aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power20
-rw-r--r--Documentation/DocBook/filesystems.tmpl5
-rw-r--r--Documentation/RCU/whatisRCU.txt31
-rw-r--r--Documentation/devicetree/bindings/i2c/ce4100-i2c.txt93
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-cmos.txt28
-rw-r--r--Documentation/devicetree/bindings/x86/ce4100.txt38
-rw-r--r--Documentation/devicetree/bindings/x86/interrupt.txt26
-rw-r--r--Documentation/devicetree/bindings/x86/timer.txt6
-rw-r--r--Documentation/devicetree/booting-without-of.txt20
-rw-r--r--Documentation/hwmon/jc4221
-rw-r--r--Documentation/hwmon/k10temp8
-rw-r--r--Documentation/kernel-parameters.txt28
-rw-r--r--Documentation/keys-request-key.txt9
-rw-r--r--Documentation/keys.txt28
-rw-r--r--Documentation/memory-barriers.txt58
-rw-r--r--Documentation/networking/00-INDEX6
-rw-r--r--Documentation/networking/Makefile2
-rw-r--r--Documentation/networking/dns_resolver.txt9
-rw-r--r--Documentation/power/devices.txt94
-rw-r--r--Documentation/power/runtime_pm.txt13
-rw-r--r--Documentation/power/states.txt12
-rw-r--r--Documentation/rtc.txt29
-rw-r--r--Documentation/spinlocks.txt24
-rw-r--r--Documentation/trace/ftrace-design.txt7
-rw-r--r--Documentation/trace/ftrace.txt151
-rw-r--r--Documentation/trace/kprobetrace.txt16
-rw-r--r--Documentation/workqueue.txt4
-rw-r--r--MAINTAINERS41
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/include/asm/fcntl.h2
-rw-r--r--arch/alpha/include/asm/futex.h29
-rw-r--r--arch/alpha/include/asm/rwsem.h36
-rw-r--r--arch/alpha/kernel/irq.c13
-rw-r--r--arch/alpha/kernel/irq_alpha.c11
-rw-r--r--arch/alpha/kernel/irq_i8259.c18
-rw-r--r--arch/alpha/kernel/irq_impl.h8
-rw-r--r--arch/alpha/kernel/irq_pyxis.c20
-rw-r--r--arch/alpha/kernel/irq_srm.c16
-rw-r--r--arch/alpha/kernel/osf_sys.c36
-rw-r--r--arch/alpha/kernel/sys_alcor.c28
-rw-r--r--arch/alpha/kernel/sys_cabriolet.c16
-rw-r--r--arch/alpha/kernel/sys_dp264.c52
-rw-r--r--arch/alpha/kernel/sys_eb64p.c18
-rw-r--r--arch/alpha/kernel/sys_eiger.c14
-rw-r--r--arch/alpha/kernel/sys_jensen.c24
-rw-r--r--arch/alpha/kernel/sys_marvel.c42
-rw-r--r--arch/alpha/kernel/sys_mikasa.c16
-rw-r--r--arch/alpha/kernel/sys_noritake.c16
-rw-r--r--arch/alpha/kernel/sys_rawhide.c17
-rw-r--r--arch/alpha/kernel/sys_rx164.c16
-rw-r--r--arch/alpha/kernel/sys_sable.c20
-rw-r--r--arch/alpha/kernel/sys_takara.c14
-rw-r--r--arch/alpha/kernel/sys_titan.c22
-rw-r--r--arch/alpha/kernel/sys_wildfire.c32
-rw-r--r--arch/alpha/kernel/time.c8
-rw-r--r--arch/alpha/kernel/vmlinux.lds.S5
-rw-r--r--arch/arm/Kconfig25
-rw-r--r--arch/arm/Makefile2
-rw-r--r--arch/arm/boot/compressed/.gitignore6
-rw-r--r--arch/arm/common/Kconfig2
-rw-r--r--arch/arm/include/asm/futex.h29
-rw-r--r--arch/arm/include/asm/hardware/cache-l2x0.h1
-rw-r--r--arch/arm/include/asm/hardware/sp810.h3
-rw-r--r--arch/arm/include/asm/mach/arch.h4
-rw-r--r--arch/arm/include/asm/pgalloc.h2
-rw-r--r--arch/arm/include/asm/tlb.h105
-rw-r--r--arch/arm/include/asm/tlbflush.h7
-rw-r--r--arch/arm/kernel/hw_breakpoint.c26
-rw-r--r--arch/arm/kernel/kprobes-decode.c2
-rw-r--r--arch/arm/kernel/pmu.c22
-rw-r--r--arch/arm/kernel/ptrace.c6
-rw-r--r--arch/arm/kernel/setup.c4
-rw-r--r--arch/arm/kernel/signal.c4
-rw-r--r--arch/arm/kernel/time.c4
-rw-r--r--arch/arm/kernel/vmlinux.lds.S13
-rw-r--r--arch/arm/mach-clps711x/include/mach/time.h2
-rw-r--r--arch/arm/mach-davinci/cpufreq.c2
-rw-r--r--arch/arm/mach-davinci/devices-da8xx.c7
-rw-r--r--arch/arm/mach-davinci/gpio-tnetv107x.c18
-rw-r--r--arch/arm/mach-davinci/include/mach/clkdev.h2
-rw-r--r--arch/arm/mach-omap2/clkt_dpll.c2
-rw-r--r--arch/arm/mach-omap2/mailbox.c12
-rw-r--r--arch/arm/mach-omap2/mux.c2
-rw-r--r--arch/arm/mach-omap2/pm-debug.c8
-rw-r--r--arch/arm/mach-omap2/prcm_mpu44xx.h4
-rw-r--r--arch/arm/mach-omap2/smartreflex.c37
-rw-r--r--arch/arm/mach-omap2/timer-gp.c13
-rw-r--r--arch/arm/mach-pxa/pxa25x.c1
-rw-r--r--arch/arm/mach-pxa/tosa-bt.c2
-rw-r--r--arch/arm/mach-pxa/tosa.c6
-rw-r--r--arch/arm/mach-s3c2440/Kconfig1
-rw-r--r--arch/arm/mach-s3c2440/include/mach/gta02.h26
-rw-r--r--arch/arm/mach-s3c64xx/clock.c6
-rw-r--r--arch/arm/mach-s3c64xx/dma.c11
-rw-r--r--arch/arm/mach-s3c64xx/gpiolib.c4
-rw-r--r--arch/arm/mach-s3c64xx/mach-smdk6410.c13
-rw-r--r--arch/arm/mach-s3c64xx/setup-keypad.c2
-rw-r--r--arch/arm/mach-s3c64xx/setup-sdhci.c2
-rw-r--r--arch/arm/mach-s5p6442/include/mach/map.h69
-rw-r--r--arch/arm/mach-s5p64x0/include/mach/gpio.h4
-rw-r--r--arch/arm/mach-s5p64x0/include/mach/map.h83
-rw-r--r--arch/arm/mach-s5pc100/include/mach/map.h193
-rw-r--r--arch/arm/mach-s5pv210/include/mach/map.h168
-rw-r--r--arch/arm/mach-s5pv210/mach-aquila.c15
-rw-r--r--arch/arm/mach-s5pv210/mach-goni.c15
-rw-r--r--arch/arm/mach-s5pv310/include/mach/map.h149
-rw-r--r--arch/arm/mach-shmobile/board-ag5evm.c1
-rw-r--r--arch/arm/mach-shmobile/board-ap4evb.c2
-rw-r--r--arch/arm/mach-shmobile/board-mackerel.c2
-rw-r--r--arch/arm/mach-shmobile/clock-sh73a0.c17
-rw-r--r--arch/arm/mach-shmobile/include/mach/head-ap4evb.txt10
-rw-r--r--arch/arm/mach-shmobile/include/mach/head-mackerel.txt10
-rw-r--r--arch/arm/mach-spear3xx/include/mach/spear320.h2
-rw-r--r--arch/arm/mach-tegra/include/mach/kbc.h1
-rw-r--r--arch/arm/mm/cache-l2x0.c6
-rw-r--r--arch/arm/mm/proc-v7.S6
-rw-r--r--arch/arm/plat-omap/mailbox.c21
-rw-r--r--arch/arm/plat-s5p/dev-uart.c12
-rw-r--r--arch/arm/plat-samsung/dev-ts.c1
-rw-r--r--arch/arm/plat-samsung/dev-uart.c2
-rw-r--r--arch/arm/plat-spear/include/plat/uncompress.h4
-rw-r--r--arch/arm/plat-spear/include/plat/vmalloc.h2
-rw-r--r--arch/blackfin/kernel/time.c6
-rw-r--r--arch/blackfin/kernel/vmlinux.lds.S2
-rw-r--r--arch/blackfin/lib/outs.S16
-rw-r--r--arch/blackfin/mach-common/cache.S2
-rw-r--r--arch/cris/arch-v10/kernel/time.c4
-rw-r--r--arch/cris/arch-v32/kernel/smp.c4
-rw-r--r--arch/cris/arch-v32/kernel/time.c6
-rw-r--r--arch/cris/kernel/vmlinux.lds.S7
-rw-r--r--arch/frv/include/asm/futex.h5
-rw-r--r--arch/frv/kernel/futex.c14
-rw-r--r--arch/frv/kernel/time.c14
-rw-r--r--arch/frv/kernel/vmlinux.lds.S2
-rw-r--r--arch/h8300/kernel/time.c4
-rw-r--r--arch/h8300/kernel/timer/timer8.c2
-rw-r--r--arch/ia64/include/asm/futex.h15
-rw-r--r--arch/ia64/include/asm/rwsem.h37
-rw-r--r--arch/ia64/include/asm/xen/hypercall.h2
-rw-r--r--arch/ia64/kernel/time.c19
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S2
-rw-r--r--arch/ia64/xen/suspend.c9
-rw-r--r--arch/ia64/xen/time.c13
-rw-r--r--arch/m32r/kernel/time.c5
-rw-r--r--arch/m32r/kernel/vmlinux.lds.S2
-rw-r--r--arch/m68k/bvme6000/config.c4
-rw-r--r--arch/m68k/kernel/time.c4
-rw-r--r--arch/m68k/mvme147/config.c4
-rw-r--r--arch/m68k/mvme16x/config.c4
-rw-r--r--arch/m68k/sun3/sun3ints.c2
-rw-r--r--arch/m68knommu/kernel/time.c8
-rw-r--r--arch/microblaze/include/asm/futex.h31
-rw-r--r--arch/microblaze/include/asm/pci-bridge.h12
-rw-r--r--arch/microblaze/include/asm/prom.h15
-rw-r--r--arch/microblaze/kernel/prom_parse.c77
-rw-r--r--arch/microblaze/pci/pci-common.c1
-rw-r--r--arch/mips/Kconfig4
-rw-r--r--arch/mips/alchemy/mtx-1/board_setup.c4
-rw-r--r--arch/mips/alchemy/mtx-1/platform.c9
-rw-r--r--arch/mips/alchemy/xxs1500/board_setup.c4
-rw-r--r--arch/mips/include/asm/futex.h39
-rw-r--r--arch/mips/include/asm/perf_event.h12
-rw-r--r--arch/mips/kernel/ftrace.c179
-rw-r--r--arch/mips/kernel/perf_event.c345
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c4
-rw-r--r--arch/mips/kernel/signal.c2
-rw-r--r--arch/mips/kernel/signal32.c2
-rw-r--r--arch/mips/kernel/smp.c31
-rw-r--r--arch/mips/kernel/syscall.c5
-rw-r--r--arch/mips/kernel/vmlinux.lds.S2
-rw-r--r--arch/mips/kernel/vpe.c4
-rw-r--r--arch/mips/loongson/Kconfig5
-rw-r--r--arch/mips/loongson/common/cmdline.c5
-rw-r--r--arch/mips/loongson/common/machtype.c3
-rw-r--r--arch/mips/math-emu/ieee754int.h4
-rw-r--r--arch/mips/mm/init.c2
-rw-r--r--arch/mips/mm/tlbex.c2
-rw-r--r--arch/mips/pci/ops-pmcmsp.c4
-rw-r--r--arch/mips/pmc-sierra/Kconfig4
-rw-r--r--arch/mips/pmc-sierra/msp71xx/msp_time.c2
-rw-r--r--arch/mn10300/include/asm/atomic.h2
-rw-r--r--arch/mn10300/include/asm/uaccess.h5
-rw-r--r--arch/mn10300/kernel/time.c6
-rw-r--r--arch/mn10300/kernel/vmlinux.lds.S2
-rw-r--r--arch/mn10300/mm/cache-inv-icache.c4
-rw-r--r--arch/parisc/hpux/sys_hpux.c65
-rw-r--r--arch/parisc/include/asm/fcntl.h2
-rw-r--r--arch/parisc/include/asm/futex.h24
-rw-r--r--arch/parisc/kernel/time.c7
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/include/asm/futex.h27
-rw-r--r--arch/powerpc/include/asm/lppaca.h16
-rw-r--r--arch/powerpc/include/asm/machdep.h6
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h10
-rw-r--r--arch/powerpc/include/asm/prom.h15
-rw-r--r--arch/powerpc/include/asm/rwsem.h51
-rw-r--r--arch/powerpc/kernel/machine_kexec.c5
-rw-r--r--arch/powerpc/kernel/paca.c14
-rw-r--r--arch/powerpc/kernel/pci-common.c1
-rw-r--r--arch/powerpc/kernel/process.c8
-rw-r--r--arch/powerpc/kernel/prom_parse.c84
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/mm/numa.c3
-rw-r--r--arch/powerpc/mm/tlb_hash64.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c2
-rw-r--r--arch/powerpc/platforms/iseries/dt.c6
-rw-r--r--arch/powerpc/platforms/iseries/setup.c1
-rw-r--r--arch/s390/boot/compressed/misc.c5
-rw-r--r--arch/s390/crypto/sha_common.c1
-rw-r--r--arch/s390/include/asm/atomic.h26
-rw-r--r--arch/s390/include/asm/cache.h1
-rw-r--r--arch/s390/include/asm/futex.h12
-rw-r--r--arch/s390/include/asm/rwsem.h63
-rw-r--r--arch/s390/include/asm/uaccess.h4
-rw-r--r--arch/s390/kernel/vmlinux.lds.S2
-rw-r--r--arch/s390/lib/uaccess.h8
-rw-r--r--arch/s390/lib/uaccess_pt.c17
-rw-r--r--arch/s390/lib/uaccess_std.c8
-rw-r--r--arch/sh/include/asm/futex-irq.h24
-rw-r--r--arch/sh/include/asm/futex.h11
-rw-r--r--arch/sh/include/asm/rwsem.h56
-rw-r--r--arch/sh/include/asm/sections.h2
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh7750.c13
-rw-r--r--arch/sh/kernel/vmlinux.lds.S2
-rw-r--r--arch/sh/lib/delay.c10
-rw-r--r--arch/sh/mm/cache.c3
-rw-r--r--arch/sparc/include/asm/fcntl.h2
-rw-r--r--arch/sparc/include/asm/futex_64.h20
-rw-r--r--arch/sparc/include/asm/pcr.h2
-rw-r--r--arch/sparc/include/asm/rwsem.h46
-rw-r--r--arch/sparc/kernel/iommu.c5
-rw-r--r--arch/sparc/kernel/pcic.c4
-rw-r--r--arch/sparc/kernel/pcr.c2
-rw-r--r--arch/sparc/kernel/smp_64.c2
-rw-r--r--arch/sparc/kernel/time_32.c9
-rw-r--r--arch/sparc/kernel/una_asm_32.S4
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S2
-rw-r--r--arch/sparc/lib/atomic32.c2
-rw-r--r--arch/sparc/lib/bitext.c5
-rw-r--r--arch/tile/include/asm/futex.h27
-rw-r--r--arch/tile/kernel/vmlinux.lds.S2
-rw-r--r--arch/um/Kconfig.common1
-rw-r--r--arch/um/Kconfig.x865
-rw-r--r--arch/um/drivers/mconsole_kern.c21
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--arch/um/include/asm/common.lds.S2
-rw-r--r--arch/um/kernel/irq.c31
-rw-r--r--arch/x86/Kconfig35
-rw-r--r--arch/x86/Kconfig.cpu5
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c7
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c24
-rw-r--r--arch/x86/ia32/ia32entry.S32
-rw-r--r--arch/x86/include/asm/acpi.h11
-rw-r--r--arch/x86/include/asm/amd_nb.h14
-rw-r--r--arch/x86/include/asm/apic.h42
-rw-r--r--arch/x86/include/asm/apicdef.h12
-rw-r--r--arch/x86/include/asm/bootparam.h1
-rw-r--r--arch/x86/include/asm/ce4100.h6
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/e820.h2
-rw-r--r--arch/x86/include/asm/entry_arch.h5
-rw-r--r--arch/x86/include/asm/frame.h6
-rw-r--r--arch/x86/include/asm/futex.h22
-rw-r--r--arch/x86/include/asm/hw_irq.h24
-rw-r--r--arch/x86/include/asm/init.h6
-rw-r--r--arch/x86/include/asm/io_apic.h44
-rw-r--r--arch/x86/include/asm/ipi.h8
-rw-r--r--arch/x86/include/asm/irq.h3
-rw-r--r--arch/x86/include/asm/irq_controller.h12
-rw-r--r--arch/x86/include/asm/irq_vectors.h45
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/mpspec.h3
-rw-r--r--arch/x86/include/asm/msr-index.h8
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/numa.h52
-rw-r--r--arch/x86/include/asm/numa_32.h7
-rw-r--r--arch/x86/include/asm/numa_64.h23
-rw-r--r--arch/x86/include/asm/olpc_ofw.h14
-rw-r--r--arch/x86/include/asm/page_types.h9
-rw-r--r--arch/x86/include/asm/percpu.h48
-rw-r--r--arch/x86/include/asm/perf_event_p4.h1
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/prom.h70
-rw-r--r--arch/x86/include/asm/rwsem.h80
-rw-r--r--arch/x86/include/asm/smp.h20
-rw-r--r--arch/x86/include/asm/smpboot_hooks.h2
-rw-r--r--arch/x86/include/asm/system.h2
-rw-r--r--arch/x86/include/asm/topology.h19
-rw-r--r--arch/x86/include/asm/unistd_32.h5
-rw-r--r--arch/x86/include/asm/unistd_64.h6
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h15
-rw-r--r--arch/x86/include/asm/xen/page.h47
-rw-r--r--arch/x86/include/asm/xen/pci.h8
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c22
-rw-r--r--arch/x86/kernel/amd_nb.c84
-rw-r--r--arch/x86/kernel/apb_timer.c62
-rw-r--r--arch/x86/kernel/aperture_64.c33
-rw-r--r--arch/x86/kernel/apic/apic.c150
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/apic_noop.c26
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c34
-rw-r--r--arch/x86/kernel/apic/es7000_32.c35
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c388
-rw-r--r--arch/x86/kernel/apic/ipi.c12
-rw-r--r--arch/x86/kernel/apic/numaq_32.c21
-rw-r--r--arch/x86/kernel/apic/probe_32.c10
-rw-r--r--arch/x86/kernel/apic/summit_32.c47
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/apm_32.c5
-rw-r--r--arch/x86/kernel/asm-offsets.c65
-rw-r--r--arch/x86/kernel/asm-offsets_32.c69
-rw-r--r--arch/x86/kernel/asm-offsets_64.c90
-rw-r--r--arch/x86/kernel/check.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c61
-rw-r--r--arch/x86/kernel/cpu/common.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c13
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c80
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event.c170
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c175
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c417
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c97
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c19
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c4
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/devicetree.c441
-rw-r--r--arch/x86/kernel/dumpstack.c25
-rw-r--r--arch/x86/kernel/e820.c18
-rw-r--r--arch/x86/kernel/early-quirks.c16
-rw-r--r--arch/x86/kernel/entry_32.S11
-rw-r--r--arch/x86/kernel/entry_64.S13
-rw-r--r--arch/x86/kernel/ftrace.c15
-rw-r--r--arch/x86/kernel/head_32.S10
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i8259.c2
-rw-r--r--arch/x86/kernel/ioport.c20
-rw-r--r--arch/x86/kernel/irq.c91
-rw-r--r--arch/x86/kernel/irqinit.c92
-rw-r--r--arch/x86/kernel/kgdb.c9
-rw-r--r--arch/x86/kernel/kprobes.c8
-rw-r--r--arch/x86/kernel/microcode_amd.c188
-rw-r--r--arch/x86/kernel/microcode_core.c6
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/rtc.c3
-rw-r--r--arch/x86/kernel/setup.c76
-rw-r--r--arch/x86/kernel/setup_percpu.c11
-rw-r--r--arch/x86/kernel/smpboot.c124
-rw-r--r--arch/x86/kernel/syscall_table_32.S3
-rw-r--r--arch/x86/kernel/vmlinux.lds.S5
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c1
-rw-r--r--arch/x86/kernel/x86_init.c1
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/kvm/trace.h8
-rw-r--r--arch/x86/lguest/boot.c4
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/atomic64_386_32.S6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S6
-rw-r--r--arch/x86/lib/checksum_32.S63
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S59
-rw-r--r--arch/x86/lib/memmove_64.S197
-rw-r--r--arch/x86/lib/memmove_64.c192
-rw-r--r--arch/x86/lib/rwsem_64.S56
-rw-r--r--arch/x86/lib/semaphore_32.S38
-rw-r--r--arch/x86/lib/thunk_32.S18
-rw-r--r--arch/x86/lib/thunk_64.S27
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/amdtopology_64.c142
-rw-r--r--arch/x86/mm/fault.c14
-rw-r--r--arch/x86/mm/init.c56
-rw-r--r--arch/x86/mm/init_32.c11
-rw-r--r--arch/x86/mm/init_64.c78
-rw-r--r--arch/x86/mm/numa.c212
-rw-r--r--arch/x86/mm/numa_32.c10
-rw-r--r--arch/x86/mm/numa_64.c988
-rw-r--r--arch/x86/mm/numa_emulation.c494
-rw-r--r--arch/x86/mm/numa_internal.h31
-rw-r--r--arch/x86/mm/pageattr.c18
-rw-r--r--arch/x86/mm/pgtable.c11
-rw-r--r--arch/x86/mm/srat_32.c6
-rw-r--r--arch/x86/mm/srat_64.c367
-rw-r--r--arch/x86/mm/tlb.c14
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/ce4100.c9
-rw-r--r--arch/x86/pci/xen.c159
-rw-r--r--arch/x86/platform/ce4100/ce4100.c26
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts428
-rw-r--r--arch/x86/platform/mrst/mrst.c2
-rw-r--r--arch/x86/platform/mrst/vrtc.c16
-rw-r--r--arch/x86/platform/olpc/Makefile4
-rw-r--r--arch/x86/platform/olpc/olpc_dt.c3
-rw-r--r--arch/x86/platform/uv/tlb_uv.c4
-rw-r--r--arch/x86/platform/uv/uv_irq.c4
-rw-r--r--arch/x86/platform/visws/visws_quirks.c4
-rw-r--r--arch/x86/xen/Kconfig10
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/mmu.c84
-rw-r--r--arch/x86/xen/p2m.c330
-rw-r--r--arch/x86/xen/setup.c68
-rw-r--r--arch/x86/xen/smp.c38
-rw-r--r--arch/x86/xen/suspend.c8
-rw-r--r--arch/x86/xen/time.c4
-rw-r--r--arch/x86/xen/xen-head.S4
-rw-r--r--arch/x86/xen/xen-ops.h2
-rw-r--r--arch/xtensa/include/asm/rwsem.h37
-rw-r--r--arch/xtensa/kernel/time.c6
-rw-r--r--arch/xtensa/kernel/vmlinux.lds.S2
-rw-r--r--block/blk-core.c18
-rw-r--r--block/blk-flush.c8
-rw-r--r--block/blk-lib.c21
-rw-r--r--block/blk-throttle.c29
-rw-r--r--block/cfq-iosched.c6
-rw-r--r--block/elevator.c4
-rw-r--r--block/genhd.c2
-rw-r--r--block/ioctl.c8
-rw-r--r--crypto/ablkcipher.c3
-rw-r--r--crypto/tcrypt.c3
-rw-r--r--crypto/testmgr.c2
-rw-r--r--crypto/testmgr.h30
-rw-r--r--drivers/acpi/Kconfig1
-rw-r--r--drivers/acpi/acpica/aclocal.h7
-rw-r--r--drivers/acpi/acpica/evgpe.c17
-rw-r--r--drivers/acpi/acpica/evxfgpe.c42
-rw-r--r--drivers/acpi/bus.c23
-rw-r--r--drivers/acpi/debugfs.c20
-rw-r--r--drivers/acpi/numa.c9
-rw-r--r--drivers/acpi/osl.c6
-rw-r--r--drivers/acpi/sleep.c4
-rw-r--r--drivers/ata/Kconfig18
-rw-r--r--drivers/ata/Makefile1
-rw-r--r--drivers/ata/ahci.c8
-rw-r--r--drivers/ata/ahci.h6
-rw-r--r--drivers/ata/ata_generic.c2
-rw-r--r--drivers/ata/ata_piix.c2
-rw-r--r--drivers/ata/libata-acpi.c3
-rw-r--r--drivers/ata/libata-core.c54
-rw-r--r--drivers/ata/libata-eh.c60
-rw-r--r--drivers/ata/libata-scsi.c15
-rw-r--r--drivers/ata/libata-sff.c17
-rw-r--r--drivers/ata/libata.h1
-rw-r--r--drivers/ata/pata_acpi.c2
-rw-r--r--drivers/ata/pata_arasan_cf.c983
-rw-r--r--drivers/ata/pata_at32.c2
-rw-r--r--drivers/ata/pata_bf54x.c4
-rw-r--r--drivers/ata/pata_hpt366.c7
-rw-r--r--drivers/ata/pata_hpt37x.c23
-rw-r--r--drivers/ata/pata_hpt3x2n.c13
-rw-r--r--drivers/ata/pata_hpt3x3.c2
-rw-r--r--drivers/ata/pata_it821x.c4
-rw-r--r--drivers/ata/pata_ixp4xx_cf.c2
-rw-r--r--drivers/ata/pata_macio.c3
-rw-r--r--drivers/ata/pata_marvell.c2
-rw-r--r--drivers/ata/pata_ninja32.c2
-rw-r--r--drivers/ata/pata_octeon_cf.c3
-rw-r--r--drivers/ata/pata_palmld.c2
-rw-r--r--drivers/ata/pata_pcmcia.c2
-rw-r--r--drivers/ata/pata_pdc2027x.c6
-rw-r--r--drivers/ata/pata_pxa.c1
-rw-r--r--drivers/ata/pata_rb532_cf.c1
-rw-r--r--drivers/ata/pata_samsung_cf.c1
-rw-r--r--drivers/ata/pata_scc.c2
-rw-r--r--drivers/ata/pata_sis.c2
-rw-r--r--drivers/ata/pdc_adma.c4
-rw-r--r--drivers/ata/sata_dwc_460ex.c75
-rw-r--r--drivers/ata/sata_fsl.c22
-rw-r--r--drivers/ata/sata_mv.c3
-rw-r--r--drivers/ata/sata_nv.c14
-rw-r--r--drivers/ata/sata_promise.c4
-rw-r--r--drivers/ata/sata_qstor.c3
-rw-r--r--drivers/ata/sata_sil.c3
-rw-r--r--drivers/ata/sata_sil24.c3
-rw-r--r--drivers/ata/sata_sis.c2
-rw-r--r--drivers/ata/sata_svw.c12
-rw-r--r--drivers/ata/sata_sx4.c5
-rw-r--r--drivers/ata/sata_uli.c3
-rw-r--r--drivers/ata/sata_via.c9
-rw-r--r--drivers/ata/sata_vsc.c3
-rw-r--r--drivers/atm/solos-pci.c5
-rw-r--r--drivers/base/Makefile2
-rw-r--r--drivers/base/power/Makefile3
-rw-r--r--drivers/base/power/main.c175
-rw-r--r--drivers/base/power/opp.c2
-rw-r--r--drivers/base/power/power.h21
-rw-r--r--drivers/base/power/runtime.c37
-rw-r--r--drivers/base/power/sysfs.c78
-rw-r--r--drivers/base/power/trace.c6
-rw-r--r--drivers/base/power/wakeup.c109
-rw-r--r--drivers/base/syscore.c117
-rw-r--r--drivers/block/floppy.c2
-rw-r--r--drivers/block/loop.c5
-rw-r--r--drivers/block/xen-blkfront.c87
-rw-r--r--drivers/bluetooth/ath3k.c5
-rw-r--r--drivers/bluetooth/btusb.c12
-rw-r--r--drivers/char/agp/amd64-agp.c9
-rw-r--r--drivers/char/agp/intel-agp.h1
-rw-r--r--drivers/char/agp/intel-gtt.c56
-rw-r--r--drivers/char/hw_random/Kconfig12
-rw-r--r--drivers/char/hw_random/Makefile1
-rw-r--r--drivers/char/hw_random/omap-rng.c14
-rw-r--r--drivers/char/hw_random/picoxcell-rng.c208
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c8
-rw-r--r--drivers/char/mmtimer.c30
-rw-r--r--drivers/char/pcmcia/cm4000_cs.c3
-rw-r--r--drivers/char/pcmcia/ipwireless/main.c52
-rw-r--r--drivers/char/random.c13
-rw-r--r--drivers/char/tpm/tpm.c28
-rw-r--r--drivers/char/tpm/tpm.h2
-rw-r--r--drivers/char/tpm/tpm_tis.c4
-rw-r--r--drivers/char/virtio_console.c8
-rw-r--r--drivers/cpufreq/cpufreq.c27
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c22
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c20
-rw-r--r--drivers/crypto/Kconfig17
-rw-r--r--drivers/crypto/Makefile2
-rw-r--r--drivers/crypto/omap-aes.c4
-rw-r--r--drivers/crypto/omap-sham.c4
-rw-r--r--drivers/crypto/picoxcell_crypto.c1867
-rw-r--r--drivers/crypto/picoxcell_crypto_regs.h128
-rw-r--r--drivers/gpio/ml_ioh_gpio.c1
-rw-r--r--drivers/gpio/pch_gpio.c1
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c4
-rw-r--r--drivers/gpu/drm/drm_irq.c29
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c4
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c11
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c17
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h24
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gem_tiling.c21
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c6
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h2
-rw-r--r--drivers/gpu/drm/i915/intel_display.c103
-rw-r--r--drivers/gpu/drm/i915/intel_panel.c9
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.h13
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bios.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c18
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.c1
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_dma.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mem.c6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_mm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_notifier.c11
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_pm.c2
-rw-r--r--drivers/gpu/drm/nouveau/nv04_dfp.c12
-rw-r--r--drivers/gpu/drm/nouveau/nv40_graph.c46
-rw-r--r--drivers/gpu/drm/nouveau/nv50_instmem.c8
-rw-r--r--drivers/gpu/drm/nouveau/nv50_vm.c4
-rw-r--r--drivers/gpu/drm/radeon/atombios_crtc.c4
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c3
-rw-r--r--drivers/gpu/drm/radeon/evergreen_blit_kms.c4
-rw-r--r--drivers/gpu/drm/radeon/r100.c26
-rw-r--r--drivers/gpu/drm/radeon/r300.c2
-rw-r--r--drivers/gpu/drm/radeon/r600.c3
-rw-r--r--drivers/gpu/drm/radeon/r600_blit_kms.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_fb.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_crtc.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c14
-rw-r--r--drivers/gpu/drm/radeon/rs600.c1
-rw-r--r--drivers/gpu/drm/radeon/rs690.c1
-rw-r--r--drivers/gpu/drm/radeon/rv770.c3
-rw-r--r--drivers/hwmon/Kconfig19
-rw-r--r--drivers/hwmon/ad7414.c1
-rw-r--r--drivers/hwmon/adt7411.c1
-rw-r--r--drivers/hwmon/f71882fg.c4
-rw-r--r--drivers/hwmon/jc42.c35
-rw-r--r--drivers/hwmon/k10temp.c5
-rw-r--r--drivers/hwmon/lm85.c23
-rw-r--r--drivers/i2c/busses/i2c-eg20t.c1
-rw-r--r--drivers/i2c/busses/i2c-ocores.c16
-rw-r--r--drivers/i2c/busses/i2c-omap.c39
-rw-r--r--drivers/i2c/busses/i2c-stu300.c2
-rw-r--r--drivers/i2c/i2c-core.c2
-rw-r--r--drivers/idle/intel_idle.c24
-rw-r--r--drivers/infiniband/core/cm.c20
-rw-r--r--drivers/infiniband/core/cma.c58
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c6
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c24
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h1
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c4
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c1
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c32
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c13
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c5
-rw-r--r--drivers/input/gameport/gameport.c2
-rw-r--r--drivers/input/keyboard/tegra-kbc.c62
-rw-r--r--drivers/input/mouse/synaptics.h23
-rw-r--r--drivers/input/serio/serio.c2
-rw-r--r--drivers/input/touchscreen/tps6507x-ts.c12
-rw-r--r--drivers/isdn/hardware/eicon/istream.c2
-rw-r--r--drivers/isdn/hisax/isdnl2.c28
-rw-r--r--drivers/md/linear.c1
-rw-r--r--drivers/md/md.c33
-rw-r--r--drivers/md/md.h2
-rw-r--r--drivers/md/multipath.c1
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/md/raid1.c6
-rw-r--r--drivers/md/raid10.c7
-rw-r--r--drivers/md/raid5.c1
-rw-r--r--drivers/media/common/tuners/tda8290.c14
-rw-r--r--drivers/media/dvb/dvb-usb/dib0700_devices.c21
-rw-r--r--drivers/media/dvb/dvb-usb/lmedm04.c6
-rw-r--r--drivers/media/dvb/frontends/dib7000m.c19
-rw-r--r--drivers/media/dvb/frontends/dib7000m.h15
-rw-r--r--drivers/media/dvb/mantis/mantis_pci.c1
-rw-r--r--drivers/media/rc/ir-raw.c3
-rw-r--r--drivers/media/rc/mceusb.c27
-rw-r--r--drivers/media/rc/nuvoton-cir.c5
-rw-r--r--drivers/media/rc/nuvoton-cir.h7
-rw-r--r--drivers/media/rc/rc-main.c2
-rw-r--r--drivers/media/video/au0828/au0828-video.c28
-rw-r--r--drivers/media/video/cx18/cx18-cards.c50
-rw-r--r--drivers/media/video/cx18/cx18-driver.c25
-rw-r--r--drivers/media/video/cx18/cx18-driver.h3
-rw-r--r--drivers/media/video/cx18/cx18-dvb.c38
-rw-r--r--drivers/media/video/cx23885/cx23885-i2c.c10
-rw-r--r--drivers/media/video/cx25840/cx25840-core.c3
-rw-r--r--drivers/media/video/ivtv/ivtv-irq.c58
-rw-r--r--drivers/media/video/mem2mem_testdev.c1
-rw-r--r--drivers/media/video/s2255drv.c10
-rw-r--r--drivers/memstick/core/memstick.c2
-rw-r--r--drivers/message/fusion/mptbase.h4
-rw-r--r--drivers/message/fusion/mptctl.c8
-rw-r--r--drivers/message/fusion/mptscsih.c7
-rw-r--r--drivers/message/i2o/driver.c3
-rw-r--r--drivers/mfd/asic3.c4
-rw-r--r--drivers/mfd/davinci_voicecodec.c4
-rw-r--r--drivers/mfd/tps6586x.c10
-rw-r--r--drivers/mfd/ucb1x00-ts.c12
-rw-r--r--drivers/mfd/wm8994-core.c18
-rw-r--r--drivers/misc/bmp085.c1
-rw-r--r--drivers/misc/iwmc3200top/iwmc3200top.h4
-rw-r--r--drivers/misc/iwmc3200top/main.c14
-rw-r--r--drivers/misc/tifm_core.c2
-rw-r--r--drivers/misc/vmw_balloon.c2
-rw-r--r--drivers/mmc/core/core.c2
-rw-r--r--drivers/mmc/core/sdio.c3
-rw-r--r--drivers/mmc/host/mmc_spi.c4
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c43
-rw-r--r--drivers/mtd/chips/jedec_probe.c35
-rw-r--r--drivers/mtd/maps/amd76xrom.c1
-rw-r--r--drivers/mtd/mtd_blkdevs.c1
-rw-r--r--drivers/mtd/nand/omap2.c2
-rw-r--r--drivers/mtd/nand/r852.c2
-rw-r--r--drivers/mtd/onenand/generic.c2
-rw-r--r--drivers/mtd/onenand/omap2.c2
-rw-r--r--drivers/mtd/sm_ftl.c2
-rw-r--r--drivers/net/ariadne.c5
-rw-r--r--drivers/net/bnx2x/bnx2x.h31
-rw-r--r--drivers/net/bnx2x/bnx2x_cmn.c87
-rw-r--r--drivers/net/bnx2x/bnx2x_cmn.h29
-rw-r--r--drivers/net/bnx2x/bnx2x_ethtool.c39
-rw-r--r--drivers/net/bnx2x/bnx2x_init.h2
-rw-r--r--drivers/net/bnx2x/bnx2x_main.c37
-rw-r--r--drivers/net/bnx2x/bnx2x_stats.c4
-rw-r--r--drivers/net/bonding/bond_3ad.c32
-rw-r--r--drivers/net/bonding/bond_3ad.h3
-rw-r--r--drivers/net/can/mcp251x.c2
-rw-r--r--drivers/net/can/softing/Kconfig2
-rw-r--r--drivers/net/can/softing/softing_main.c1
-rw-r--r--drivers/net/cnic.c33
-rw-r--r--drivers/net/cxgb4/t4_msg.h1
-rw-r--r--drivers/net/cxgb4vf/cxgb4vf_main.c80
-rw-r--r--drivers/net/cxgb4vf/t4vf_hw.c2
-rw-r--r--drivers/net/davinci_emac.c2
-rw-r--r--drivers/net/dm9000.c9
-rw-r--r--drivers/net/dnet.c3
-rw-r--r--drivers/net/e1000/e1000_osdep.h3
-rw-r--r--drivers/net/e1000e/netdev.c63
-rw-r--r--drivers/net/ethoc.c8
-rw-r--r--drivers/net/fec.c3
-rw-r--r--drivers/net/forcedeth.c2
-rw-r--r--drivers/net/igbvf/vf.c2
-rw-r--r--drivers/net/ixgbe/ixgbe_fcoe.c51
-rw-r--r--drivers/net/ixgbe/ixgbe_fcoe.h2
-rw-r--r--drivers/net/ixgbe/ixgbe_main.c6
-rw-r--r--drivers/net/macb.c2
-rw-r--r--drivers/net/macvtap.c3
-rw-r--r--drivers/net/pch_gbe/pch_gbe.h2
-rw-r--r--drivers/net/pch_gbe/pch_gbe_main.c106
-rw-r--r--drivers/net/pcmcia/fmvj18x_cs.c1
-rw-r--r--drivers/net/r6040.c115
-rw-r--r--drivers/net/r8169.c50
-rw-r--r--drivers/net/sfc/ethtool.c22
-rw-r--r--drivers/net/skge.c3
-rw-r--r--drivers/net/smsc911x.c5
-rw-r--r--drivers/net/stmmac/stmmac_main.c4
-rw-r--r--drivers/net/tg3.c8
-rw-r--r--drivers/net/usb/dm9601.c4
-rw-r--r--drivers/net/usb/hso.c12
-rw-r--r--drivers/net/usb/usbnet.c4
-rw-r--r--drivers/net/wireless/ath/ath5k/phy.c143
-rw-r--r--drivers/net/wireless/ath/ath9k/ath9k.h6
-rw-r--r--drivers/net/wireless/ath/ath9k/hif_usb.c9
-rw-r--r--drivers/net/wireless/ath/ath9k/init.c8
-rw-r--r--drivers/net/wireless/ath/ath9k/mac.c5
-rw-r--r--drivers/net/wireless/ath/ath9k/main.c8
-rw-r--r--drivers/net/wireless/ath/carl9170/usb.c2
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2100.c70
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2100.h1
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2200.c196
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2200.h2
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-3945.c67
-rw-r--r--drivers/net/wireless/iwlwifi/iwl-5000.c2
-rw-r--r--drivers/net/wireless/p54/p54pci.c14
-rw-r--r--drivers/net/wireless/p54/p54usb.c1
-rw-r--r--drivers/net/wireless/rndis_wlan.c3
-rw-r--r--drivers/net/wireless/rt2x00/rt2800pci.c8
-rw-r--r--drivers/net/wireless/rt2x00/rt2800usb.c6
-rw-r--r--drivers/nfc/Kconfig2
-rw-r--r--drivers/nfc/pn544.c4
-rw-r--r--drivers/of/Kconfig6
-rw-r--r--drivers/of/Makefile1
-rw-r--r--drivers/of/of_pci.c92
-rw-r--r--drivers/of/pdt.c112
-rw-r--r--drivers/pci/pci-driver.c4
-rw-r--r--drivers/pci/xen-pcifront.c31
-rw-r--r--drivers/pcmcia/pcmcia_resource.c2
-rw-r--r--drivers/pcmcia/pxa2xx_base.c2
-rw-r--r--drivers/pcmcia/pxa2xx_base.h1
-rw-r--r--drivers/pcmcia/pxa2xx_colibri.c3
-rw-r--r--drivers/pcmcia/pxa2xx_lubbock.c1
-rw-r--r--drivers/platform/x86/Kconfig2
-rw-r--r--drivers/platform/x86/acer-wmi.c4
-rw-r--r--drivers/platform/x86/asus_acpi.c8
-rw-r--r--drivers/platform/x86/dell-laptop.c24
-rw-r--r--drivers/platform/x86/intel_pmic_gpio.c116
-rw-r--r--drivers/platform/x86/tc1100-wmi.c2
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c8
-rw-r--r--drivers/pps/generators/Kconfig2
-rw-r--r--drivers/pps/kapi.c2
-rw-r--r--drivers/rapidio/rio-sysfs.c12
-rw-r--r--drivers/regulator/mc13xxx-regulator-core.c2
-rw-r--r--drivers/regulator/wm831x-dcdc.c1
-rw-r--r--drivers/rtc/Kconfig12
-rw-r--r--drivers/rtc/class.c7
-rw-r--r--drivers/rtc/interface.c203
-rw-r--r--drivers/rtc/rtc-at91rm9200.c28
-rw-r--r--drivers/rtc/rtc-at91sam9.c30
-rw-r--r--drivers/rtc/rtc-bfin.c27
-rw-r--r--drivers/rtc/rtc-cmos.c111
-rw-r--r--drivers/rtc/rtc-davinci.c55
-rw-r--r--drivers/rtc/rtc-dev.c104
-rw-r--r--drivers/rtc/rtc-ds1511.c17
-rw-r--r--drivers/rtc/rtc-ds1553.c17
-rw-r--r--drivers/rtc/rtc-ds3232.c32
-rw-r--r--drivers/rtc/rtc-jz4740.c7
-rw-r--r--drivers/rtc/rtc-mc13xxx.c7
-rw-r--r--drivers/rtc/rtc-mpc5121.c20
-rw-r--r--drivers/rtc/rtc-mrst.c33
-rw-r--r--drivers/rtc/rtc-mxc.c7
-rw-r--r--drivers/rtc/rtc-nuc900.c15
-rw-r--r--drivers/rtc/rtc-omap.c39
-rw-r--r--drivers/rtc/rtc-pcap.c6
-rw-r--r--drivers/rtc/rtc-pcf50633.c22
-rw-r--r--drivers/rtc/rtc-pl030.c6
-rw-r--r--drivers/rtc/rtc-pl031.c55
-rw-r--r--drivers/rtc/rtc-proc.c8
-rw-r--r--drivers/rtc/rtc-pxa.c44
-rw-r--r--drivers/rtc/rtc-rs5c372.c52
-rw-r--r--drivers/rtc/rtc-rx8025.c25
-rw-r--r--drivers/rtc/rtc-s3c.c41
-rw-r--r--drivers/rtc/rtc-sa1100.c160
-rw-r--r--drivers/rtc/rtc-sh.c24
-rw-r--r--drivers/rtc/rtc-stmp3xxx.c15
-rw-r--r--drivers/rtc/rtc-test.c13
-rw-r--r--drivers/rtc/rtc-twl.c13
-rw-r--r--drivers/rtc/rtc-vr41xx.c32
-rw-r--r--drivers/rtc/rtc-wm831x.c16
-rw-r--r--drivers/rtc/rtc-wm8350.c21
-rw-r--r--drivers/s390/block/dasd_eckd.c2
-rw-r--r--drivers/s390/block/xpram.c4
-rw-r--r--drivers/s390/char/keyboard.c3
-rw-r--r--drivers/s390/char/tape.h8
-rw-r--r--drivers/s390/char/tape_34xx.c59
-rw-r--r--drivers/s390/char/tape_3590.c83
-rw-r--r--drivers/scsi/Makefile2
-rw-r--r--drivers/scsi/be2iscsi/be_main.c2
-rw-r--r--drivers/scsi/ipr.c9
-rw-r--r--drivers/scsi/libsas/sas_ata.c94
-rw-r--r--drivers/scsi/libsas/sas_scsi_host.c14
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c5
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c10
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c12
-rw-r--r--drivers/scsi/scsi_debug.c2
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--drivers/scsi/scsi_priv.h2
-rw-r--r--drivers/scsi/scsi_sysfs.c2
-rw-r--r--drivers/scsi/scsi_tgt_lib.c2
-rw-r--r--drivers/scsi/scsi_transport_fc.c2
-rw-r--r--drivers/spi/pxa2xx_spi.c2
-rw-r--r--drivers/spi/pxa2xx_spi_pci.c63
-rw-r--r--drivers/spi/xilinx_spi.c6
-rw-r--r--drivers/target/Makefile3
-rw-r--r--drivers/target/target_core_configfs.c155
-rw-r--r--drivers/target/target_core_device.c13
-rw-r--r--drivers/target/target_core_fabric_configfs.c92
-rw-r--r--drivers/target/target_core_iblock.c8
-rw-r--r--drivers/target/target_core_mib.c1078
-rw-r--r--drivers/target/target_core_mib.h28
-rw-r--r--drivers/target/target_core_pscsi.c4
-rw-r--r--drivers/target/target_core_tmr.c5
-rw-r--r--drivers/target/target_core_tpg.c29
-rw-r--r--drivers/target/target_core_transport.c56
-rw-r--r--drivers/thermal/Kconfig1
-rw-r--r--drivers/thermal/thermal_sys.c40
-rw-r--r--drivers/tty/serial/max3100.c2
-rw-r--r--drivers/tty/serial/max3107.c2
-rw-r--r--drivers/tty/serial/serial_cs.c1
-rw-r--r--drivers/usb/core/hcd-pci.c4
-rw-r--r--drivers/usb/core/hub.c28
-rw-r--r--drivers/usb/core/quirks.c8
-rw-r--r--drivers/usb/gadget/f_phonet.c15
-rw-r--r--drivers/usb/host/ehci-xilinx-of.c1
-rw-r--r--drivers/usb/host/xhci-dbg.c9
-rw-r--r--drivers/usb/host/xhci-mem.c10
-rw-r--r--drivers/usb/host/xhci-ring.c40
-rw-r--r--drivers/usb/host/xhci.c14
-rw-r--r--drivers/usb/host/xhci.h2
-rw-r--r--drivers/usb/musb/musb_core.c1
-rw-r--r--drivers/usb/musb/musb_core.h17
-rw-r--r--drivers/usb/musb/omap2430.c1
-rw-r--r--drivers/usb/serial/sierra.c3
-rw-r--r--drivers/usb/serial/usb_wwan.c15
-rw-r--r--drivers/usb/serial/visor.c12
-rw-r--r--drivers/video/backlight/ltv350qv.c9
-rw-r--r--drivers/watchdog/cpwd.c2
-rw-r--r--drivers/watchdog/hpwdt.c4
-rw-r--r--drivers/watchdog/sbc_fitpc2_wdt.c7
-rw-r--r--drivers/watchdog/sch311x_wdt.c2
-rw-r--r--drivers/watchdog/w83697ug_wdt.c2
-rw-r--r--drivers/xen/balloon.c16
-rw-r--r--drivers/xen/events.c342
-rw-r--r--drivers/xen/manage.c143
-rw-r--r--drivers/xen/platform-pci.c3
-rw-r--r--fs/9p/acl.c28
-rw-r--r--fs/9p/cache.c204
-rw-r--r--fs/9p/cache.h64
-rw-r--r--fs/9p/fid.c114
-rw-r--r--fs/9p/fid.h5
-rw-r--r--fs/9p/v9fs.c108
-rw-r--r--fs/9p/v9fs.h53
-rw-r--r--fs/9p/v9fs_vfs.h26
-rw-r--r--fs/9p/vfs_addr.c194
-rw-r--r--fs/9p/vfs_dentry.c47
-rw-r--r--fs/9p/vfs_dir.c1
-rw-r--r--fs/9p/vfs_file.c316
-rw-r--r--fs/9p/vfs_inode.c307
-rw-r--r--fs/9p/vfs_inode_dotl.c198
-rw-r--r--fs/9p/vfs_super.c65
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c56
-rw-r--r--fs/block_dev.c30
-rw-r--r--fs/btrfs/ctree.h12
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/extent-tree.c44
-rw-r--r--fs/btrfs/extent_io.c165
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/file.c114
-rw-r--r--fs/btrfs/inode.c148
-rw-r--r--fs/btrfs/ioctl.c7
-rw-r--r--fs/btrfs/lzo.c21
-rw-r--r--fs/btrfs/relocation.c13
-rw-r--r--fs/btrfs/super.c7
-rw-r--r--fs/btrfs/volumes.c13
-rw-r--r--fs/btrfs/xattr.c6
-rw-r--r--fs/btrfs/xattr.h3
-rw-r--r--fs/cachefiles/namei.c52
-rw-r--r--fs/ceph/dir.c27
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/snap.c14
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/netmisc.c8
-rw-r--r--fs/cifs/sess.c8
-rw-r--r--fs/compat.c69
-rw-r--r--fs/dcache.c121
-rw-r--r--fs/ecryptfs/dentry.c22
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h3
-rw-r--r--fs/ecryptfs/file.c1
-rw-r--r--fs/ecryptfs/inode.c138
-rw-r--r--fs/eventfd.c12
-rw-r--r--fs/eventpoll.c95
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exofs/namei.c8
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/namei.c17
-rw-r--r--fs/ext2/xattr.h6
-rw-r--r--fs/ext2/xattr_security.c5
-rw-r--r--fs/ext3/ialloc.c5
-rw-r--r--fs/ext3/namei.c15
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext3/xattr.h4
-rw-r--r--fs/ext3/xattr_security.c5
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/ext4/xattr.h4
-rw-r--r--fs/ext4/xattr_security.c5
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/file_table.c60
-rw-r--r--fs/fuse/dir.c9
-rw-r--r--fs/fuse/file.c52
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/acl.c7
-rw-r--r--fs/gfs2/aops.c1
-rw-r--r--fs/gfs2/bmap.c20
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/gfs2/file.c77
-rw-r--r--fs/gfs2/glock.c414
-rw-r--r--fs/gfs2/glock.h39
-rw-r--r--fs/gfs2/glops.c33
-rw-r--r--fs/gfs2/incore.h7
-rw-r--r--fs/gfs2/inode.c7
-rw-r--r--fs/gfs2/lock_dlm.c14
-rw-r--r--fs/gfs2/log.c32
-rw-r--r--fs/gfs2/lops.c10
-rw-r--r--fs/gfs2/main.c17
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c11
-rw-r--r--fs/gfs2/ops_inode.c10
-rw-r--r--fs/gfs2/quota.c14
-rw-r--r--fs/gfs2/rgrp.c34
-rw-r--r--fs/gfs2/rgrp.h2
-rw-r--r--fs/hfs/dir.c50
-rw-r--r--fs/inode.c63
-rw-r--r--fs/internal.h15
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jffs2/dir.c9
-rw-r--r--fs/jffs2/nodelist.h2
-rw-r--r--fs/jffs2/security.c5
-rw-r--r--fs/jffs2/write.c18
-rw-r--r--fs/jffs2/xattr.h5
-rw-r--r--fs/jfs/jfs_xattr.h5
-rw-r--r--fs/jfs/namei.c13
-rw-r--r--fs/jfs/xattr.c6
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c1494
-rw-r--r--fs/namespace.c22
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4filelayoutdev.c4
-rw-r--r--fs/nfs/nfs4proc.c131
-rw-r--r--fs/nfs/nfs4state.c29
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/nfsd/nfs4xdr.c12
-rw-r--r--fs/nilfs2/btnode.c5
-rw-r--r--fs/nilfs2/btnode.h1
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/namei.c8
-rw-r--r--fs/nilfs2/page.c13
-rw-r--r--fs/nilfs2/page.h1
-rw-r--r--fs/nilfs2/segment.c3
-rw-r--r--fs/nilfs2/super.c2
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/journal.h6
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/quota.h3
-rw-r--r--fs/ocfs2/quota_global.c27
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/super.c35
-rw-r--r--fs/ocfs2/xattr.c10
-rw-r--r--fs/ocfs2/xattr.h4
-rw-r--r--fs/open.c137
-rw-r--r--fs/partitions/ldm.c5
-rw-r--r--fs/partitions/mac.c17
-rw-r--r--fs/partitions/osf.c12
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_sysctl.c8
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/namei.c15
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/reiserfs/xattr_security.c3
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/udf/namei.c18
-rw-r--r--fs/ufs/namei.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c9
-rw-r--r--fs/xfs/xfs_fsops.c3
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--include/acpi/acpi_bus.h2
-rw-r--r--include/asm-generic/cputime.h3
-rw-r--r--include/asm-generic/fcntl.h4
-rw-r--r--include/asm-generic/futex.h7
-rw-r--r--include/asm-generic/pgtable.h2
-rw-r--r--include/asm-generic/sections.h1
-rw-r--r--include/asm-generic/unistd.h6
-rw-r--r--include/asm-generic/vmlinux.lds.h41
-rw-r--r--include/drm/drmP.h2
-rw-r--r--include/keys/rxrpc-type.h1
-rw-r--r--include/linux/ata.h163
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/blktrace_api.h1
-rw-r--r--include/linux/ceph/messenger.h2
-rw-r--r--include/linux/cgroup.h4
-rw-r--r--include/linux/cgroup_subsys.h4
-rw-r--r--include/linux/dcbnl.h2
-rw-r--r--include/linux/debugobjects.h5
-rw-r--r--include/linux/device.h8
-rw-r--r--include/linux/exportfs.h9
-rw-r--r--include/linux/ext3_fs.h3
-rw-r--r--include/linux/fcntl.h1
-rw-r--r--include/linux/file.h2
-rw-r--r--include/linux/freezer.h2
-rw-r--r--include/linux/fs.h46
-rw-r--r--include/linux/ftrace.h2
-rw-r--r--include/linux/ftrace_event.h2
-rw-r--r--include/linux/gfp.h11
-rw-r--r--include/linux/hrtimer.h24
-rw-r--r--include/linux/i2c.h2
-rw-r--r--include/linux/ima.h6
-rw-r--r--include/linux/interrupt.h85
-rw-r--r--include/linux/irq.h368
-rw-r--r--include/linux/irqdesc.h78
-rw-r--r--include/linux/jiffies.h1
-rw-r--r--include/linux/key-type.h14
-rw-r--r--include/linux/key.h5
-rw-r--r--include/linux/keyctl.h2
-rw-r--r--include/linux/kthread.h2
-rw-r--r--include/linux/libata.h10
-rw-r--r--include/linux/list.h12
-rw-r--r--include/linux/mfd/wm8994/core.h1
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/module.h2
-rw-r--r--include/linux/namei.h7
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/linux/nfs_fs_sb.h10
-rw-r--r--include/linux/of.h16
-rw-r--r--include/linux/of_pci.h9
-rw-r--r--include/linux/pata_arasan_cf_data.h49
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/percpu.h128
-rw-r--r--include/linux/perf_event.h50
-rw-r--r--include/linux/plist.h47
-rw-r--r--include/linux/pm.h21
-rw-r--r--include/linux/pm_runtime.h6
-rw-r--r--include/linux/pm_wakeup.h33
-rw-r--r--include/linux/posix-clock.h150
-rw-r--r--include/linux/posix-timers.h44
-rw-r--r--include/linux/ptrace.h3
-rw-r--r--include/linux/reiserfs_xattr.h2
-rw-r--r--include/linux/ring_buffer.h2
-rw-r--r--include/linux/rio_regs.h4
-rw-r--r--include/linux/rtc.h20
-rw-r--r--include/linux/rwlock_types.h8
-rw-r--r--include/linux/rwsem-spinlock.h31
-rw-r--r--include/linux/rwsem.h53
-rw-r--r--include/linux/sched.h24
-rw-r--r--include/linux/security.h44
-rw-r--r--include/linux/spinlock_types.h8
-rw-r--r--include/linux/sunrpc/sched.h1
-rw-r--r--include/linux/syscalls.h20
-rw-r--r--include/linux/syscore_ops.h29
-rw-r--r--include/linux/sysctl.h14
-rw-r--r--include/linux/thermal.h8
-rw-r--r--include/linux/thread_info.h3
-rw-r--r--include/linux/time.h14
-rw-r--r--include/linux/timex.h3
-rw-r--r--include/linux/workqueue.h12
-rw-r--r--include/linux/xattr.h2
-rw-r--r--include/net/9p/9p.h12
-rw-r--r--include/net/9p/client.h1
-rw-r--r--include/net/9p/transport.h9
-rw-r--r--include/net/ipv6.h12
-rw-r--r--include/net/netfilter/nf_tproxy_core.h12
-rw-r--r--include/net/sch_generic.h2
-rw-r--r--include/pcmcia/ds.h1
-rw-r--r--include/scsi/sas_ata.h22
-rw-r--r--include/sound/wm8903.h10
-rw-r--r--include/target/target_core_base.h28
-rw-r--r--include/target/target_core_transport.h4
-rw-r--r--include/trace/events/block.h6
-rw-r--r--include/trace/events/mce.h8
-rw-r--r--include/trace/events/module.h5
-rw-r--r--include/trace/events/skb.h4
-rw-r--r--include/xen/events.h8
-rw-r--r--include/xen/interface/io/blkif.h37
-rw-r--r--include/xen/interface/xen.h4
-rw-r--r--include/xen/xen-ops.h6
-rw-r--r--init/Kconfig22
-rw-r--r--kernel/audit_watch.c85
-rw-r--r--kernel/cgroup.c54
-rw-r--r--kernel/compat.c136
-rw-r--r--kernel/cpuset.c7
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/futex.c147
-rw-r--r--kernel/hrtimer.c90
-rw-r--r--kernel/irq/Kconfig39
-rw-r--r--kernel/irq/autoprobe.c54
-rw-r--r--kernel/irq/chip.c483
-rw-r--r--kernel/irq/compat.h72
-rw-r--r--kernel/irq/debug.h40
-rw-r--r--kernel/irq/handle.c144
-rw-r--r--kernel/irq/internals.h173
-rw-r--r--kernel/irq/irqdesc.c79
-rw-r--r--kernel/irq/manage.c604
-rw-r--r--kernel/irq/migration.c38
-rw-r--r--kernel/irq/pm.c30
-rw-r--r--kernel/irq/proc.c70
-rw-r--r--kernel/irq/resend.c19
-rw-r--r--kernel/irq/settings.h138
-rw-r--r--kernel/irq/spurious.c163
-rw-r--r--kernel/perf_event.c1023
-rw-r--r--kernel/pm_qos_params.c24
-rw-r--r--kernel/posix-cpu-timers.c110
-rw-r--r--kernel/posix-timers.c342
-rw-r--r--kernel/power/Kconfig237
-rw-r--r--kernel/power/hibernate.c9
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/power/process.c6
-rw-r--r--kernel/power/snapshot.c15
-rw-r--r--kernel/power/suspend.c4
-rw-r--r--kernel/ptrace.c6
-rw-r--r--kernel/rcupdate.c10
-rw-r--r--kernel/rcutiny_plugin.h2
-rw-r--r--kernel/rcutorture.c1
-rw-r--r--kernel/rtmutex-debug.c1
-rw-r--r--kernel/rtmutex-tester.c40
-rw-r--r--kernel/rtmutex.c318
-rw-r--r--kernel/rtmutex_common.h16
-rw-r--r--kernel/sched.c339
-rw-r--r--kernel/sched_autogroup.c15
-rw-r--r--kernel/sched_autogroup.h5
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/sched_fair.c397
-rw-r--r--kernel/sched_idletask.c26
-rw-r--r--kernel/sched_rt.c33
-rw-r--r--kernel/sched_stoptask.c7
-rw-r--r--kernel/softirq.c24
-rw-r--r--kernel/sys.c4
-rw-r--r--kernel/sys_ni.c5
-rw-r--r--kernel/sysctl.c31
-rw-r--r--kernel/sysctl_binary.c19
-rw-r--r--kernel/time.c35
-rw-r--r--kernel/time/Makefile3
-rw-r--r--kernel/time/clockevents.c1
-rw-r--r--kernel/time/jiffies.c20
-rw-r--r--kernel/time/ntp.c13
-rw-r--r--kernel/time/posix-clock.c451
-rw-r--r--kernel/time/tick-broadcast.c11
-rw-r--r--kernel/time/tick-common.c7
-rw-r--r--kernel/time/tick-internal.h12
-rw-r--r--kernel/time/tick-oneshot.c1
-rw-r--r--kernel/time/tick-sched.c1
-rw-r--r--kernel/time/timekeeping.c141
-rw-r--r--kernel/timer.c42
-rw-r--r--kernel/trace/blktrace.c16
-rw-r--r--kernel/trace/ftrace.c52
-rw-r--r--kernel/trace/ring_buffer.c24
-rw-r--r--kernel/trace/trace.c38
-rw-r--r--kernel/trace/trace.h41
-rw-r--r--kernel/trace/trace_entries.h6
-rw-r--r--kernel/trace/trace_events.c2
-rw-r--r--kernel/trace/trace_events_filter.c885
-rw-r--r--kernel/trace/trace_kprobe.c111
-rw-r--r--kernel/trace/trace_output.c36
-rw-r--r--kernel/trace/trace_sched_switch.c48
-rw-r--r--kernel/trace/trace_syscalls.c42
-rw-r--r--kernel/workqueue.c49
-rw-r--r--lib/debugobjects.c9
-rw-r--r--lib/list_debug.c39
-rw-r--r--lib/nlattr.c2
-rw-r--r--lib/plist.c135
-rw-r--r--lib/rwsem.c10
-rw-r--r--lib/swiotlb.c6
-rw-r--r--mm/Makefile8
-rw-r--r--mm/bootmem.c180
-rw-r--r--mm/huge_memory.c34
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mempolicy.c16
-rw-r--r--mm/migrate.c6
-rw-r--r--mm/mremap.c4
-rw-r--r--mm/nobootmem.c435
-rw-r--r--mm/page_alloc.c76
-rw-r--r--mm/rmap.c54
-rw-r--r--mm/shmem.c13
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/truncate.c2
-rw-r--r--mm/vmscan.c32
-rw-r--r--net/9p/Makefile1
-rw-r--r--net/9p/client.c166
-rw-r--r--net/9p/protocol.c44
-rw-r--r--net/9p/trans_common.c97
-rw-r--r--net/9p/trans_common.h32
-rw-r--r--net/9p/trans_fd.c52
-rw-r--r--net/9p/trans_virtio.c129
-rw-r--r--net/Makefile4
-rw-r--r--net/bluetooth/l2cap.c1
-rw-r--r--net/bluetooth/rfcomm/tty.c2
-rw-r--r--net/bridge/Kconfig1
-rw-r--r--net/bridge/br_input.c2
-rw-r--r--net/bridge/br_multicast.c42
-rw-r--r--net/bridge/br_private.h3
-rw-r--r--net/ceph/messenger.c133
-rw-r--r--net/ceph/pagevec.c18
-rw-r--r--net/core/dev.c21
-rw-r--r--net/core/dev_addr_lists.c2
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/scm.c2
-rw-r--r--net/dcb/dcbnl.c11
-rw-r--r--net/dccp/input.c7
-rw-r--r--net/dns_resolver/dns_key.c20
-rw-r--r--net/ipv4/devinet.c32
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_gre.c3
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/route.c1
-rw-r--r--net/ipv4/tcp_input.c5
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c2
-rw-r--r--net/ipv6/route.c22
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/mac80211/iface.c1
-rw-r--r--net/mac80211/mlme.c6
-rw-r--r--net/mac80211/util.c2
-rw-r--r--net/netfilter/core.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/nf_log.c4
-rw-r--r--net/netfilter/nf_tproxy_core.c27
-rw-r--r--net/netfilter/xt_TPROXY.c22
-rw-r--r--net/netfilter/xt_socket.c13
-rw-r--r--net/netlink/af_netlink.c18
-rw-r--r--net/rds/ib.c9
-rw-r--r--net/rds/ib.h2
-rw-r--r--net/rds/ib_rdma.c27
-rw-r--r--net/rds/ib_send.c5
-rw-r--r--net/rds/loop.c11
-rw-r--r--net/rxrpc/ar-input.c1
-rw-r--r--net/rxrpc/ar-key.c27
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sctp/sm_make_chunk.c10
-rw-r--r--net/sunrpc/sched.c77
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c1
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/unix/af_unix.c19
-rw-r--r--net/unix/garbage.c2
-rw-r--r--net/wireless/wext-compat.c4
-rw-r--r--net/xfrm/xfrm_policy.c7
-rw-r--r--scripts/basic/fixdep.c21
-rwxr-xr-xscripts/checkpatch.pl5
-rw-r--r--scripts/kconfig/streamline_config.pl2
-rw-r--r--scripts/mod/sumversion.c19
-rw-r--r--scripts/recordmcount.c3
-rwxr-xr-xscripts/recordmcount.pl1
-rw-r--r--scripts/rt-tester/rt-tester.py2
-rw-r--r--scripts/rt-tester/t2-l1-2rt-sameprio.tst5
-rw-r--r--scripts/rt-tester/t2-l1-pi.tst5
-rw-r--r--scripts/rt-tester/t2-l1-signal.tst5
-rw-r--r--scripts/rt-tester/t2-l2-2rt-deadlock.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-1rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-2rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-3rt.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-signal.tst5
-rw-r--r--scripts/rt-tester/t3-l1-pi-steal.tst5
-rw-r--r--scripts/rt-tester/t3-l2-pi.tst5
-rw-r--r--scripts/rt-tester/t4-l2-pi-deboost.tst5
-rw-r--r--scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst5
-rw-r--r--scripts/rt-tester/t5-l4-pi-boost-deboost.tst5
-rw-r--r--scripts/selinux/genheaders/genheaders.c20
-rw-r--r--security/apparmor/Makefile38
-rw-r--r--security/apparmor/lsm.c2
-rw-r--r--security/capability.c15
-rw-r--r--security/commoncap.c2
-rw-r--r--security/integrity/ima/ima.h3
-rw-r--r--security/integrity/ima/ima_api.c13
-rw-r--r--security/integrity/ima/ima_iint.c5
-rw-r--r--security/integrity/ima/ima_main.c136
-rw-r--r--security/keys/compat.c50
-rw-r--r--security/keys/encrypted.c3
-rw-r--r--security/keys/internal.h8
-rw-r--r--security/keys/key.c27
-rw-r--r--security/keys/keyctl.c143
-rw-r--r--security/keys/keyring.c4
-rw-r--r--security/keys/request_key.c2
-rw-r--r--security/keys/trusted.c3
-rw-r--r--security/keys/user_defined.c3
-rw-r--r--security/security.c21
-rw-r--r--security/selinux/hooks.c350
-rw-r--r--security/selinux/include/classmap.h7
-rw-r--r--security/selinux/include/security.h8
-rw-r--r--security/selinux/ss/avtab.h23
-rw-r--r--security/selinux/ss/ebitmap.h1
-rw-r--r--security/selinux/ss/mls.c5
-rw-r--r--security/selinux/ss/mls.h3
-rw-r--r--security/selinux/ss/policydb.c130
-rw-r--r--security/selinux/ss/policydb.h14
-rw-r--r--security/selinux/ss/services.c73
-rw-r--r--security/selinux/xfrm.c2
-rw-r--r--security/smack/smack.h17
-rw-r--r--security/smack/smack_access.c52
-rw-r--r--security/smack/smack_lsm.c287
-rw-r--r--security/smack/smackfs.c370
-rw-r--r--security/tomoyo/file.c5
-rw-r--r--sound/core/jack.c1
-rw-r--r--sound/pci/au88x0/au88x0_core.c14
-rw-r--r--sound/pci/hda/hda_intel.c1
-rw-r--r--sound/pci/hda/patch_cirrus.c2
-rw-r--r--sound/pci/hda/patch_conexant.c68
-rw-r--r--sound/pci/hda/patch_hdmi.c5
-rw-r--r--sound/pci/hda/patch_realtek.c9
-rw-r--r--sound/pci/hda/patch_sigmatel.c15
-rw-r--r--sound/pci/hda/patch_via.c2
-rw-r--r--sound/soc/codecs/cx20442.c2
-rw-r--r--sound/soc/codecs/wm8903.c2
-rw-r--r--sound/soc/codecs/wm8903.h2
-rw-r--r--sound/soc/codecs/wm8978.c14
-rw-r--r--sound/soc/codecs/wm8994.c249
-rw-r--r--sound/soc/codecs/wm9081.c5
-rw-r--r--sound/soc/codecs/wm_hubs.c3
-rw-r--r--sound/soc/imx/eukrea-tlv320.c2
-rw-r--r--sound/soc/omap/am3517evm.c2
-rw-r--r--sound/soc/pxa/e740_wm9705.c4
-rw-r--r--sound/soc/pxa/e750_wm9705.c4
-rw-r--r--sound/soc/pxa/e800_wm9712.c4
-rw-r--r--sound/soc/pxa/em-x270.c4
-rw-r--r--sound/soc/pxa/mioa701_wm9713.c4
-rw-r--r--sound/soc/pxa/palm27x.c4
-rw-r--r--sound/soc/pxa/tosa.c4
-rw-r--r--sound/soc/pxa/zylonite.c4
-rw-r--r--sound/soc/soc-dapm.c25
-rw-r--r--sound/usb/caiaq/audio.c2
-rw-r--r--sound/usb/caiaq/midi.c2
-rw-r--r--sound/usb/card.c4
-rw-r--r--sound/usb/pcm.c7
-rw-r--r--sound/usb/usbaudio.h1
-rw-r--r--tools/perf/.gitignore1
-rw-r--r--tools/perf/Documentation/Makefile19
-rw-r--r--tools/perf/Documentation/perf-list.txt23
-rw-r--r--tools/perf/Documentation/perf-lock.txt12
-rw-r--r--tools/perf/Documentation/perf-probe.txt26
-rw-r--r--tools/perf/Documentation/perf-record.txt11
-rw-r--r--tools/perf/Documentation/perf-stat.txt11
-rw-r--r--tools/perf/Makefile649
-rw-r--r--tools/perf/bench/sched-pipe.c2
-rw-r--r--tools/perf/builtin-annotate.c351
-rw-r--r--tools/perf/builtin-diff.c16
-rw-r--r--tools/perf/builtin-inject.c82
-rw-r--r--tools/perf/builtin-kmem.c10
-rw-r--r--tools/perf/builtin-list.c43
-rw-r--r--tools/perf/builtin-lock.c8
-rw-r--r--tools/perf/builtin-probe.c70
-rw-r--r--tools/perf/builtin-record.c450
-rw-r--r--tools/perf/builtin-report.c225
-rw-r--r--tools/perf/builtin-sched.c27
-rw-r--r--tools/perf/builtin-script.c17
-rw-r--r--tools/perf/builtin-stat.c118
-rw-r--r--tools/perf/builtin-test.c184
-rw-r--r--tools/perf/builtin-timechart.c19
-rw-r--r--tools/perf/builtin-top.c1029
-rw-r--r--tools/perf/perf.h26
-rwxr-xr-xtools/perf/python/twatch.py41
-rw-r--r--tools/perf/util/annotate.c605
-rw-r--r--tools/perf/util/annotate.h103
-rw-r--r--tools/perf/util/build-id.c21
-rw-r--r--tools/perf/util/cache.h7
-rw-r--r--tools/perf/util/callchain.c227
-rw-r--r--tools/perf/util/callchain.h76
-rw-r--r--tools/perf/util/cgroup.c178
-rw-r--r--tools/perf/util/cgroup.h17
-rw-r--r--tools/perf/util/cpumap.c5
-rw-r--r--tools/perf/util/cpumap.h2
-rw-r--r--tools/perf/util/debug.c2
-rw-r--r--tools/perf/util/debug.h2
-rw-r--r--tools/perf/util/event.c374
-rw-r--r--tools/perf/util/event.h78
-rw-r--r--tools/perf/util/evlist.c394
-rw-r--r--tools/perf/util/evlist.h68
-rw-r--r--tools/perf/util/evsel.c234
-rw-r--r--tools/perf/util/evsel.h47
-rw-r--r--tools/perf/util/exec_cmd.c19
-rw-r--r--tools/perf/util/header.c546
-rw-r--r--tools/perf/util/header.h95
-rw-r--r--tools/perf/util/hist.c257
-rw-r--r--tools/perf/util/hist.h60
-rw-r--r--tools/perf/util/include/linux/list.h1
-rw-r--r--tools/perf/util/parse-events.c175
-rw-r--r--tools/perf/util/parse-events.h12
-rw-r--r--tools/perf/util/probe-event.c159
-rw-r--r--tools/perf/util/probe-event.h4
-rw-r--r--tools/perf/util/probe-finder.c533
-rw-r--r--tools/perf/util/python.c896
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c3
-rw-r--r--tools/perf/util/session.c266
-rw-r--r--tools/perf/util/session.h41
-rw-r--r--tools/perf/util/setup.py19
-rw-r--r--tools/perf/util/strfilter.c199
-rw-r--r--tools/perf/util/strfilter.h48
-rw-r--r--tools/perf/util/svghelper.c6
-rw-r--r--tools/perf/util/symbol.c7
-rw-r--r--tools/perf/util/symbol.h1
-rw-r--r--tools/perf/util/thread.c55
-rw-r--r--tools/perf/util/thread.h14
-rw-r--r--tools/perf/util/thread_map.c64
-rw-r--r--tools/perf/util/thread_map.h15
-rw-r--r--tools/perf/util/top.c238
-rw-r--r--tools/perf/util/top.h66
-rw-r--r--tools/perf/util/trace-event-parse.c2
-rw-r--r--tools/perf/util/ui/browser.c25
-rw-r--r--tools/perf/util/ui/browser.h3
-rw-r--r--tools/perf/util/ui/browsers/annotate.c178
-rw-r--r--tools/perf/util/ui/browsers/hists.c197
-rw-r--r--tools/perf/util/ui/browsers/map.c2
-rw-r--r--tools/perf/util/ui/browsers/top.c213
-rw-r--r--tools/perf/util/ui/helpline.c5
-rw-r--r--tools/perf/util/ui/libslang.h6
-rw-r--r--tools/perf/util/ui/setup.c8
-rw-r--r--tools/perf/util/ui/ui.h8
-rw-r--r--tools/perf/util/ui/util.c7
-rw-r--r--tools/perf/util/util.h26
-rwxr-xr-xtools/testing/ktest/ktest.pl2
1452 files changed, 36362 insertions, 20929 deletions
diff --git a/.gitignore b/.gitignore
index 8faa6c02b39e..5d56a3fd0de6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,7 @@ modules.builtin
*.gz
*.bz2
*.lzma
+*.xz
*.lzo
*.patch
*.gcno
diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
index 7628cd1bc36a..8ffbc25376a0 100644
--- a/Documentation/ABI/testing/sysfs-devices-power
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -29,9 +29,8 @@ Description:
"disabled" to it.
For the devices that are not capable of generating system wakeup
- events this file contains "\n". In that cases the user space
- cannot modify the contents of this file and the device cannot be
- enabled to wake up the system.
+ events this file is not present. In that case the device cannot
+ be enabled to wake up the system from sleep states.
What: /sys/devices/.../power/control
Date: January 2009
@@ -85,7 +84,7 @@ Description:
The /sys/devices/.../wakeup_count attribute contains the number
of signaled wakeup events associated with the device. This
attribute is read-only. If the device is not enabled to wake up
- the system from sleep states, this attribute is empty.
+ the system from sleep states, this attribute is not present.
What: /sys/devices/.../power/wakeup_active_count
Date: September 2010
@@ -95,7 +94,7 @@ Description:
number of times the processing of wakeup events associated with
the device was completed (at the kernel level). This attribute
is read-only. If the device is not enabled to wake up the
- system from sleep states, this attribute is empty.
+ system from sleep states, this attribute is not present.
What: /sys/devices/.../power/wakeup_hit_count
Date: September 2010
@@ -105,7 +104,8 @@ Description:
number of times the processing of a wakeup event associated with
the device might prevent the system from entering a sleep state.
This attribute is read-only. If the device is not enabled to
- wake up the system from sleep states, this attribute is empty.
+ wake up the system from sleep states, this attribute is not
+ present.
What: /sys/devices/.../power/wakeup_active
Date: September 2010
@@ -115,7 +115,7 @@ Description:
or 0, depending on whether or not a wakeup event associated with
the device is being processed (1). This attribute is read-only.
If the device is not enabled to wake up the system from sleep
- states, this attribute is empty.
+ states, this attribute is not present.
What: /sys/devices/.../power/wakeup_total_time_ms
Date: September 2010
@@ -125,7 +125,7 @@ Description:
the total time of processing wakeup events associated with the
device, in milliseconds. This attribute is read-only. If the
device is not enabled to wake up the system from sleep states,
- this attribute is empty.
+ this attribute is not present.
What: /sys/devices/.../power/wakeup_max_time_ms
Date: September 2010
@@ -135,7 +135,7 @@ Description:
the maximum time of processing a single wakeup event associated
with the device, in milliseconds. This attribute is read-only.
If the device is not enabled to wake up the system from sleep
- states, this attribute is empty.
+ states, this attribute is not present.
What: /sys/devices/.../power/wakeup_last_time_ms
Date: September 2010
@@ -146,7 +146,7 @@ Description:
signaling the last wakeup event associated with the device, in
milliseconds. This attribute is read-only. If the device is
not enabled to wake up the system from sleep states, this
- attribute is empty.
+ attribute is not present.
What: /sys/devices/.../power/autosuspend_delay_ms
Date: September 2010
diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl
index 5e87ad58c0b5..f51f28531b8d 100644
--- a/Documentation/DocBook/filesystems.tmpl
+++ b/Documentation/DocBook/filesystems.tmpl
@@ -82,6 +82,11 @@
</sect1>
</chapter>
+ <chapter id="fs_events">
+ <title>Events based on file descriptors</title>
+!Efs/eventfd.c
+ </chapter>
+
<chapter id="sysfs">
<title>The Filesystem for Exporting Kernel Objects</title>
!Efs/sysfs/file.c
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index cfaac34c4557..6ef692667e2f 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -849,6 +849,37 @@ All: lockdep-checked RCU-protected pointer access
See the comment headers in the source code (or the docbook generated
from them) for more information.
+However, given that there are no fewer than four families of RCU APIs
+in the Linux kernel, how do you choose which one to use? The following
+list can be helpful:
+
+a. Will readers need to block? If so, you need SRCU.
+
+b. What about the -rt patchset? If readers would need to block
+ in an non-rt kernel, you need SRCU. If readers would block
+ in a -rt kernel, but not in a non-rt kernel, SRCU is not
+ necessary.
+
+c. Do you need to treat NMI handlers, hardirq handlers,
+ and code segments with preemption disabled (whether
+ via preempt_disable(), local_irq_save(), local_bh_disable(),
+ or some other mechanism) as if they were explicit RCU readers?
+ If so, you need RCU-sched.
+
+d. Do you need RCU grace periods to complete even in the face
+ of softirq monopolization of one or more of the CPUs? For
+ example, is your code subject to network-based denial-of-service
+ attacks? If so, you need RCU-bh.
+
+e. Is your workload too update-intensive for normal use of
+ RCU, but inappropriate for other synchronization mechanisms?
+ If so, consider SLAB_DESTROY_BY_RCU. But please be careful!
+
+f. Otherwise, use RCU.
+
+Of course, this all assumes that you have determined that RCU is in fact
+the right tool for your job.
+
8. ANSWERS TO QUICK QUIZZES
diff --git a/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt b/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt
new file mode 100644
index 000000000000..569b16248514
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/ce4100-i2c.txt
@@ -0,0 +1,93 @@
+CE4100 I2C
+----------
+
+CE4100 has one PCI device which is described as the I2C-Controller. This
+PCI device has three PCI-bars, each bar contains a complete I2C
+controller. So we have a total of three independent I2C-Controllers
+which share only an interrupt line.
+The driver is probed via the PCI-ID and is gathering the information of
+attached devices from the devices tree.
+Grant Likely recommended to use the ranges property to map the PCI-Bar
+number to its physical address and to use this to find the child nodes
+of the specific I2C controller. This were his exact words:
+
+ Here's where the magic happens. Each entry in
+ ranges describes how the parent pci address space
+ (middle group of 3) is translated to the local
+ address space (first group of 2) and the size of
+ each range (last cell). In this particular case,
+ the first cell of the local address is chosen to be
+ 1:1 mapped to the BARs, and the second is the
+ offset from be base of the BAR (which would be
+ non-zero if you had 2 or more devices mapped off
+ the same BAR)
+
+ ranges allows the address mapping to be described
+ in a way that the OS can interpret without
+ requiring custom device driver code.
+
+This is an example which is used on FalconFalls:
+------------------------------------------------
+ i2c-controller@b,2 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "pci8086,2e68.2",
+ "pci8086,2e68",
+ "pciclass,ff0000",
+ "pciclass,ff00";
+
+ reg = <0x15a00 0x0 0x0 0x0 0x0>;
+ interrupts = <16 1>;
+
+ /* as described by Grant, the first number in the group of
+ * three is the bar number followed by the 64bit bar address
+ * followed by size of the mapping. The bar address
+ * requires also a valid translation in parents ranges
+ * property.
+ */
+ ranges = <0 0 0x02000000 0 0xdffe0500 0x100
+ 1 0 0x02000000 0 0xdffe0600 0x100
+ 2 0 0x02000000 0 0xdffe0700 0x100>;
+
+ i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+
+ /* The first number in the reg property is the
+ * number of the bar
+ */
+ reg = <0 0 0x100>;
+
+ /* This I2C controller has no devices */
+ };
+
+ i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <1 0 0x100>;
+
+ /* This I2C controller has one gpio controller */
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+
+ i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <2 0 0x100>;
+
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/rtc/rtc-cmos.txt b/Documentation/devicetree/bindings/rtc/rtc-cmos.txt
new file mode 100644
index 000000000000..7382989b3052
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/rtc-cmos.txt
@@ -0,0 +1,28 @@
+ Motorola mc146818 compatible RTC
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Required properties:
+ - compatible : "motorola,mc146818"
+ - reg : should contain registers location and length.
+
+Optional properties:
+ - interrupts : should contain interrupt.
+ - interrupt-parent : interrupt source phandle.
+ - ctrl-reg : Contains the initial value of the control register also
+ called "Register B".
+ - freq-reg : Contains the initial value of the frequency register also
+ called "Regsiter A".
+
+"Register A" and "B" are usually initialized by the firmware (BIOS for
+instance). If this is not done, it can be performed by the driver.
+
+ISA Example:
+
+ rtc@70 {
+ compatible = "motorola,mc146818";
+ interrupts = <8 3>;
+ interrupt-parent = <&ioapic1>;
+ ctrl-reg = <2>;
+ freq-reg = <0x26>;
+ reg = <1 0x70 2>;
+ };
diff --git a/Documentation/devicetree/bindings/x86/ce4100.txt b/Documentation/devicetree/bindings/x86/ce4100.txt
new file mode 100644
index 000000000000..b49ae593a60b
--- /dev/null
+++ b/Documentation/devicetree/bindings/x86/ce4100.txt
@@ -0,0 +1,38 @@
+CE4100 Device Tree Bindings
+---------------------------
+
+The CE4100 SoC uses for in core peripherals the following compatible
+format: <vendor>,<chip>-<device>.
+Many of the "generic" devices like HPET or IO APIC have the ce4100
+name in their compatible property because they first appeared in this
+SoC.
+
+The CPU node
+------------
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "intel,ce4100";
+ reg = <0>;
+ lapic = <&lapic0>;
+ };
+
+The reg property describes the CPU number. The lapic property points to
+the local APIC timer.
+
+The SoC node
+------------
+
+This node describes the in-core peripherals. Required property:
+ compatible = "intel,ce4100-cp";
+
+The PCI node
+------------
+This node describes the PCI bus on the SoC. Its property should be
+ compatible = "intel,ce4100-pci", "pci";
+
+If the OS is using the IO-APIC for interrupt routing then the reported
+interrupt numbers for devices is no longer true. In order to obtain the
+correct interrupt number, the child node which represents the device has
+to contain the interrupt property. Besides the interrupt property it has
+to contain at least the reg property containing the PCI bus address and
+compatible property according to "PCI Bus Binding Revision 2.1".
diff --git a/Documentation/devicetree/bindings/x86/interrupt.txt b/Documentation/devicetree/bindings/x86/interrupt.txt
new file mode 100644
index 000000000000..7d19f494f19a
--- /dev/null
+++ b/Documentation/devicetree/bindings/x86/interrupt.txt
@@ -0,0 +1,26 @@
+Interrupt chips
+---------------
+
+* Intel I/O Advanced Programmable Interrupt Controller (IO APIC)
+
+ Required properties:
+ --------------------
+ compatible = "intel,ce4100-ioapic";
+ #interrupt-cells = <2>;
+
+ Device's interrupt property:
+
+ interrupts = <P S>;
+
+ The first number (P) represents the interrupt pin which is wired to the
+ IO APIC. The second number (S) represents the sense of interrupt which
+ should be configured and can be one of:
+ 0 - Edge Rising
+ 1 - Level Low
+ 2 - Level High
+ 3 - Edge Falling
+
+* Local APIC
+ Required property:
+
+ compatible = "intel,ce4100-lapic";
diff --git a/Documentation/devicetree/bindings/x86/timer.txt b/Documentation/devicetree/bindings/x86/timer.txt
new file mode 100644
index 000000000000..c688af58e3bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/x86/timer.txt
@@ -0,0 +1,6 @@
+Timers
+------
+
+* High Precision Event Timer (HPET)
+ Required property:
+ compatible = "intel,ce4100-hpet";
diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt
index 28b1c9d3d351..55fd2623445b 100644
--- a/Documentation/devicetree/booting-without-of.txt
+++ b/Documentation/devicetree/booting-without-of.txt
@@ -13,6 +13,7 @@ Table of Contents
I - Introduction
1) Entry point for arch/powerpc
+ 2) Entry point for arch/x86
II - The DT block format
1) Header
@@ -225,6 +226,25 @@ it with special cases.
cannot support both configurations with Book E and configurations
with classic Powerpc architectures.
+2) Entry point for arch/x86
+-------------------------------
+
+ There is one single 32bit entry point to the kernel at code32_start,
+ the decompressor (the real mode entry point goes to the same 32bit
+ entry point once it switched into protected mode). That entry point
+ supports one calling convention which is documented in
+ Documentation/x86/boot.txt
+ The physical pointer to the device-tree block (defined in chapter II)
+ is passed via setup_data which requires at least boot protocol 2.09.
+ The type filed is defined as
+
+ #define SETUP_DTB 2
+
+ This device-tree is used as an extension to the "boot page". As such it
+ does not parse / consider data which is already covered by the boot
+ page. This includes memory size, reserved ranges, command line arguments
+ or initrd address. It simply holds information which can not be retrieved
+ otherwise like interrupt routing or a list of devices behind an I2C bus.
II - The DT block format
========================
diff --git a/Documentation/hwmon/jc42 b/Documentation/hwmon/jc42
index 0e76ef12e4c6..a22ecf48f255 100644
--- a/Documentation/hwmon/jc42
+++ b/Documentation/hwmon/jc42
@@ -51,7 +51,8 @@ Supported chips:
* JEDEC JC 42.4 compliant temperature sensor chips
Prefix: 'jc42'
Addresses scanned: I2C 0x18 - 0x1f
- Datasheet: -
+ Datasheet:
+ http://www.jedec.org/sites/default/files/docs/4_01_04R19.pdf
Author:
Guenter Roeck <guenter.roeck@ericsson.com>
@@ -60,7 +61,11 @@ Author:
Description
-----------
-This driver implements support for JEDEC JC 42.4 compliant temperature sensors.
+This driver implements support for JEDEC JC 42.4 compliant temperature sensors,
+which are used on many DDR3 memory modules for mobile devices and servers. Some
+systems use the sensor to prevent memory overheating by automatically throttling
+the memory controller.
+
The driver auto-detects the chips listed above, but can be manually instantiated
to support other JC 42.4 compliant chips.
@@ -81,15 +86,19 @@ limits. The chip supports only a single register to configure the hysteresis,
which applies to all limits. This register can be written by writing into
temp1_crit_hyst. Other hysteresis attributes are read-only.
+If the BIOS has configured the sensor for automatic temperature management, it
+is likely that it has locked the registers, i.e., that the temperature limits
+cannot be changed.
+
Sysfs entries
-------------
temp1_input Temperature (RO)
-temp1_min Minimum temperature (RW)
-temp1_max Maximum temperature (RW)
-temp1_crit Critical high temperature (RW)
+temp1_min Minimum temperature (RO or RW)
+temp1_max Maximum temperature (RO or RW)
+temp1_crit Critical high temperature (RO or RW)
-temp1_crit_hyst Critical hysteresis temperature (RW)
+temp1_crit_hyst Critical hysteresis temperature (RO or RW)
temp1_max_hyst Maximum hysteresis temperature (RO)
temp1_min_alarm Temperature low alarm
diff --git a/Documentation/hwmon/k10temp b/Documentation/hwmon/k10temp
index 6526eee525a6..d2b56a4fd1f5 100644
--- a/Documentation/hwmon/k10temp
+++ b/Documentation/hwmon/k10temp
@@ -9,6 +9,8 @@ Supported chips:
Socket S1G3: Athlon II, Sempron, Turion II
* AMD Family 11h processors:
Socket S1G2: Athlon (X2), Sempron (X2), Turion X2 (Ultra)
+* AMD Family 12h processors: "Llano"
+* AMD Family 14h processors: "Brazos" (C/E/G-Series)
Prefix: 'k10temp'
Addresses scanned: PCI space
@@ -17,10 +19,14 @@ Supported chips:
http://support.amd.com/us/Processor_TechDocs/31116.pdf
BIOS and Kernel Developer's Guide (BKDG) for AMD Family 11h Processors:
http://support.amd.com/us/Processor_TechDocs/41256.pdf
+ BIOS and Kernel Developer's Guide (BKDG) for AMD Family 14h Models 00h-0Fh Processors:
+ http://support.amd.com/us/Processor_TechDocs/43170.pdf
Revision Guide for AMD Family 10h Processors:
http://support.amd.com/us/Processor_TechDocs/41322.pdf
Revision Guide for AMD Family 11h Processors:
http://support.amd.com/us/Processor_TechDocs/41788.pdf
+ Revision Guide for AMD Family 14h Models 00h-0Fh Processors:
+ http://support.amd.com/us/Processor_TechDocs/47534.pdf
AMD Family 11h Processor Power and Thermal Data Sheet for Notebooks:
http://support.amd.com/us/Processor_TechDocs/43373.pdf
AMD Family 10h Server and Workstation Processor Power and Thermal Data Sheet:
@@ -34,7 +40,7 @@ Description
-----------
This driver permits reading of the internal temperature sensor of AMD
-Family 10h and 11h processors.
+Family 10h/11h/12h/14h processors.
All these processors have a sensor, but on those for Socket F or AM2+,
the sensor may return inconsistent values (erratum 319). The driver
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 89835a4766a6..738c6fda3fb0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -144,6 +144,11 @@ a fixed number of characters. This limit depends on the architecture
and is between 256 and 4096 characters. It is defined in the file
./include/asm/setup.h as COMMAND_LINE_SIZE.
+Finally, the [KMG] suffix is commonly described after a number of kernel
+parameter values. These 'K', 'M', and 'G' letters represent the _binary_
+multipliers 'Kilo', 'Mega', and 'Giga', equalling 2^10, 2^20, and 2^30
+bytes respectively. Such letter suffixes can also be entirely omitted.
+
acpi= [HW,ACPI,X86]
Advanced Configuration and Power Interface
@@ -545,16 +550,20 @@ and is between 256 and 4096 characters. It is defined in the file
Format:
<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
- crashkernel=nn[KMG]@ss[KMG]
- [KNL] Reserve a chunk of physical memory to
- hold a kernel to switch to with kexec on panic.
+ crashkernel=size[KMG][@offset[KMG]]
+ [KNL] Using kexec, Linux can switch to a 'crash kernel'
+ upon panic. This parameter reserves the physical
+ memory region [offset, offset + size] for that kernel
+ image. If '@offset' is omitted, then a suitable offset
+ is selected automatically. Check
+ Documentation/kdump/kdump.txt for further details.
crashkernel=range1:size1[,range2:size2,...][@offset]
[KNL] Same as above, but depends on the memory
in the running system. The syntax of range is
start-[end] where start and end are both
a memory unit (amount[KMG]). See also
- Documentation/kdump/kdump.txt for a example.
+ Documentation/kdump/kdump.txt for an example.
cs89x0_dma= [HW,NET]
Format: <dma>
@@ -1262,10 +1271,9 @@ and is between 256 and 4096 characters. It is defined in the file
6 (KERN_INFO) informational
7 (KERN_DEBUG) debug-level messages
- log_buf_len=n Sets the size of the printk ring buffer, in bytes.
- Format: { n | nk | nM }
- n must be a power of two. The default size
- is set in the kernel config file.
+ log_buf_len=n[KMG] Sets the size of the printk ring buffer,
+ in bytes. n must be a power of two. The default
+ size is set in the kernel config file.
logo.nologo [FB] Disables display of the built-in Linux logo.
This may be used to provide more screen space for
@@ -2436,6 +2444,10 @@ and is between 256 and 4096 characters. It is defined in the file
<deci-seconds>: poll all this frequency
0: no polling (default)
+ threadirqs [KNL]
+ Force threading of all interrupt handlers except those
+ marked explicitely IRQF_NO_THREAD.
+
topology= [S390]
Format: {off | on}
Specify if the kernel should make use of the cpu
diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt
index 09b55e461740..69686ad12c66 100644
--- a/Documentation/keys-request-key.txt
+++ b/Documentation/keys-request-key.txt
@@ -127,14 +127,15 @@ This is because process A's keyrings can't simply be attached to
of them, and (b) it requires the same UID/GID/Groups all the way through.
-======================
-NEGATIVE INSTANTIATION
-======================
+====================================
+NEGATIVE INSTANTIATION AND REJECTION
+====================================
Rather than instantiating a key, it is possible for the possessor of an
authorisation key to negatively instantiate a key that's under construction.
This is a short duration placeholder that causes any attempt at re-requesting
-the key whilst it exists to fail with error ENOKEY.
+the key whilst it exists to fail with error ENOKEY if negated or the specified
+error if rejected.
This is provided to prevent excessive repeated spawning of /sbin/request-key
processes for a key that will never be obtainable.
diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index e4dbbdb1bd96..6523a9e6f293 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -637,6 +637,9 @@ The keyctl syscall functions are:
long keyctl(KEYCTL_INSTANTIATE, key_serial_t key,
const void *payload, size_t plen,
key_serial_t keyring);
+ long keyctl(KEYCTL_INSTANTIATE_IOV, key_serial_t key,
+ const struct iovec *payload_iov, unsigned ioc,
+ key_serial_t keyring);
If the kernel calls back to userspace to complete the instantiation of a
key, userspace should use this call to supply data for the key before the
@@ -652,11 +655,16 @@ The keyctl syscall functions are:
The payload and plen arguments describe the payload data as for add_key().
+ The payload_iov and ioc arguments describe the payload data in an iovec
+ array instead of a single buffer.
+
(*) Negatively instantiate a partially constructed key.
long keyctl(KEYCTL_NEGATE, key_serial_t key,
unsigned timeout, key_serial_t keyring);
+ long keyctl(KEYCTL_REJECT, key_serial_t key,
+ unsigned timeout, unsigned error, key_serial_t keyring);
If the kernel calls back to userspace to complete the instantiation of a
key, userspace should use this call mark the key as negative before the
@@ -669,6 +677,10 @@ The keyctl syscall functions are:
that keyring, however all the constraints applying in KEYCTL_LINK apply in
this case too.
+ If the key is rejected, future searches for it will return the specified
+ error code until the rejected key expires. Negating the key is the same
+ as rejecting the key with ENOKEY as the error code.
+
(*) Set the default request-key destination keyring.
@@ -1062,6 +1074,13 @@ The structure has a number of fields, some of which are mandatory:
viable.
+ (*) int (*vet_description)(const char *description);
+
+ This optional method is called to vet a key description. If the key type
+ doesn't approve of the key description, it may return an error, otherwise
+ it should return 0.
+
+
(*) int (*instantiate)(struct key *key, const void *data, size_t datalen);
This method is called to attach a payload to a key during construction.
@@ -1231,10 +1250,11 @@ hand the request off to (perhaps a path held in placed in another key by, for
example, the KDE desktop manager).
The program (or whatever it calls) should finish construction of the key by
-calling KEYCTL_INSTANTIATE, which also permits it to cache the key in one of
-the keyrings (probably the session ring) before returning. Alternatively, the
-key can be marked as negative with KEYCTL_NEGATE; this also permits the key to
-be cached in one of the keyrings.
+calling KEYCTL_INSTANTIATE or KEYCTL_INSTANTIATE_IOV, which also permits it to
+cache the key in one of the keyrings (probably the session ring) before
+returning. Alternatively, the key can be marked as negative with KEYCTL_NEGATE
+or KEYCTL_REJECT; this also permits the key to be cached in one of the
+keyrings.
If it returns with the key remaining in the unconstructed state, the key will
be marked as being negative, it will be added to the session keyring, and an
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 631ad2f1b229..f0d3a8026a56 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -21,6 +21,7 @@ Contents:
- SMP barrier pairing.
- Examples of memory barrier sequences.
- Read memory barriers vs load speculation.
+ - Transitivity
(*) Explicit kernel barriers.
@@ -959,6 +960,63 @@ the speculation will be cancelled and the value reloaded:
retrieved : : +-------+
+TRANSITIVITY
+------------
+
+Transitivity is a deeply intuitive notion about ordering that is not
+always provided by real computer systems. The following example
+demonstrates transitivity (also called "cumulativity"):
+
+ CPU 1 CPU 2 CPU 3
+ ======================= ======================= =======================
+ { X = 0, Y = 0 }
+ STORE X=1 LOAD X STORE Y=1
+ <general barrier> <general barrier>
+ LOAD Y LOAD X
+
+Suppose that CPU 2's load from X returns 1 and its load from Y returns 0.
+This indicates that CPU 2's load from X in some sense follows CPU 1's
+store to X and that CPU 2's load from Y in some sense preceded CPU 3's
+store to Y. The question is then "Can CPU 3's load from X return 0?"
+
+Because CPU 2's load from X in some sense came after CPU 1's store, it
+is natural to expect that CPU 3's load from X must therefore return 1.
+This expectation is an example of transitivity: if a load executing on
+CPU A follows a load from the same variable executing on CPU B, then
+CPU A's load must either return the same value that CPU B's load did,
+or must return some later value.
+
+In the Linux kernel, use of general memory barriers guarantees
+transitivity. Therefore, in the above example, if CPU 2's load from X
+returns 1 and its load from Y returns 0, then CPU 3's load from X must
+also return 1.
+
+However, transitivity is -not- guaranteed for read or write barriers.
+For example, suppose that CPU 2's general barrier in the above example
+is changed to a read barrier as shown below:
+
+ CPU 1 CPU 2 CPU 3
+ ======================= ======================= =======================
+ { X = 0, Y = 0 }
+ STORE X=1 LOAD X STORE Y=1
+ <read barrier> <general barrier>
+ LOAD Y LOAD X
+
+This substitution destroys transitivity: in this example, it is perfectly
+legal for CPU 2's load from X to return 1, its load from Y to return 0,
+and CPU 3's load from X to return 0.
+
+The key point is that although CPU 2's read barrier orders its pair
+of loads, it does not guarantee to order CPU 1's store. Therefore, if
+this example runs on a system where CPUs 1 and 2 share a store buffer
+or a level of cache, CPU 2 might have early access to CPU 1's writes.
+General barriers are therefore required to ensure that all CPUs agree
+on the combined order of CPU 1's and CPU 2's accesses.
+
+To reiterate, if your code requires transitivity, use general barriers
+throughout.
+
+
========================
EXPLICIT KERNEL BARRIERS
========================
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index fe5c099b8fc8..4edd78dfb362 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -40,8 +40,6 @@ decnet.txt
- info on using the DECnet networking layer in Linux.
depca.txt
- the Digital DEPCA/EtherWORKS DE1?? and DE2?? LANCE Ethernet driver
-dgrs.txt
- - the Digi International RightSwitch SE-X Ethernet driver
dmfe.txt
- info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver.
e100.txt
@@ -50,8 +48,6 @@ e1000.txt
- info on Intel's E1000 line of gigabit ethernet boards
eql.txt
- serial IP load balancing
-ethertap.txt
- - the Ethertap user space packet reception and transmission driver
ewrk3.txt
- the Digital EtherWORKS 3 DE203/4/5 Ethernet driver
filter.txt
@@ -104,8 +100,6 @@ tuntap.txt
- TUN/TAP device driver, allowing user space Rx/Tx of packets.
vortex.txt
- info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards.
-wavelan.txt
- - AT&T GIS (nee NCR) WaveLAN card: An Ethernet-like radio transceiver
x25.txt
- general info on X.25 development.
x25-iface.txt
diff --git a/Documentation/networking/Makefile b/Documentation/networking/Makefile
index 5aba7a33aeeb..24c308dd3fd1 100644
--- a/Documentation/networking/Makefile
+++ b/Documentation/networking/Makefile
@@ -4,6 +4,8 @@ obj- := dummy.o
# List of programs to build
hostprogs-y := ifenslave
+HOSTCFLAGS_ifenslave.o += -I$(objtree)/usr/include
+
# Tell kbuild to always build the programs
always := $(hostprogs-y)
diff --git a/Documentation/networking/dns_resolver.txt b/Documentation/networking/dns_resolver.txt
index aefd1e681804..04ca06325b08 100644
--- a/Documentation/networking/dns_resolver.txt
+++ b/Documentation/networking/dns_resolver.txt
@@ -61,7 +61,6 @@ before the more general line given above as the first match is the one taken.
create dns_resolver foo:* * /usr/sbin/dns.foo %k
-
=====
USAGE
=====
@@ -104,6 +103,14 @@ implemented in the module can be called after doing:
returned also.
+===============================
+READING DNS KEYS FROM USERSPACE
+===============================
+
+Keys of dns_resolver type can be read from userspace using keyctl_read() or
+"keyctl read/print/pipe".
+
+
=========
MECHANISM
=========
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 57080cd74575..f023ba6bba62 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -1,6 +1,6 @@
Device Power Management
-Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu>
@@ -159,18 +159,18 @@ matter, and the kernel is responsible for keeping track of it. By contrast,
whether or not a wakeup-capable device should issue wakeup events is a policy
decision, and it is managed by user space through a sysfs attribute: the
power/wakeup file. User space can write the strings "enabled" or "disabled" to
-set or clear the should_wakeup flag, respectively. Reads from the file will
-return the corresponding string if can_wakeup is true, but if can_wakeup is
-false then reads will return an empty string, to indicate that the device
-doesn't support wakeup events. (But even though the file appears empty, writes
-will still affect the should_wakeup flag.)
+set or clear the "should_wakeup" flag, respectively. This file is only present
+for wakeup-capable devices (i.e. devices whose "can_wakeup" flags are set)
+and is created (or removed) by device_set_wakeup_capable(). Reads from the
+file will return the corresponding string.
The device_may_wakeup() routine returns true only if both flags are set.
-Drivers should check this routine when putting devices in a low-power state
-during a system sleep transition, to see whether or not to enable the devices'
-wakeup mechanisms. However for runtime power management, wakeup events should
-be enabled whenever the device and driver both support them, regardless of the
-should_wakeup flag.
+This information is used by subsystems, like the PCI bus type code, to see
+whether or not to enable the devices' wakeup mechanisms. If device wakeup
+mechanisms are enabled or disabled directly by drivers, they also should use
+device_may_wakeup() to decide what to do during a system sleep transition.
+However for runtime power management, wakeup events should be enabled whenever
+the device and driver both support them, regardless of the should_wakeup flag.
/sys/devices/.../power/control files
@@ -249,23 +249,18 @@ various phases always run after tasks have been frozen and before they are
unfrozen. Furthermore, the *_noirq phases run at a time when IRQ handlers have
been disabled (except for those marked with the IRQ_WAKEUP flag).
-Most phases use bus, type, and class callbacks (that is, methods defined in
-dev->bus->pm, dev->type->pm, and dev->class->pm). The prepare and complete
-phases are exceptions; they use only bus callbacks. When multiple callbacks
-are used in a phase, they are invoked in the order: <class, type, bus> during
-power-down transitions and in the opposite order during power-up transitions.
-For example, during the suspend phase the PM core invokes
-
- dev->class->pm.suspend(dev);
- dev->type->pm.suspend(dev);
- dev->bus->pm.suspend(dev);
-
-before moving on to the next device, whereas during the resume phase the core
-invokes
-
- dev->bus->pm.resume(dev);
- dev->type->pm.resume(dev);
- dev->class->pm.resume(dev);
+All phases use bus, type, or class callbacks (that is, methods defined in
+dev->bus->pm, dev->type->pm, or dev->class->pm). These callbacks are mutually
+exclusive, so if the device type provides a struct dev_pm_ops object pointed to
+by its pm field (i.e. both dev->type and dev->type->pm are defined), the
+callbacks included in that object (i.e. dev->type->pm) will be used. Otherwise,
+if the class provides a struct dev_pm_ops object pointed to by its pm field
+(i.e. both dev->class and dev->class->pm are defined), the PM core will use the
+callbacks from that object (i.e. dev->class->pm). Finally, if the pm fields of
+both the device type and class objects are NULL (or those objects do not exist),
+the callbacks provided by the bus (that is, the callbacks from dev->bus->pm)
+will be used (this allows device types to override callbacks provided by bus
+types or classes if necessary).
These callbacks may in turn invoke device- or driver-specific methods stored in
dev->driver->pm, but they don't have to.
@@ -507,6 +502,49 @@ routines. Nevertheless, different callback pointers are used in case there is a
situation where it actually matters.
+Device Power Domains
+--------------------
+Sometimes devices share reference clocks or other power resources. In those
+cases it generally is not possible to put devices into low-power states
+individually. Instead, a set of devices sharing a power resource can be put
+into a low-power state together at the same time by turning off the shared
+power resource. Of course, they also need to be put into the full-power state
+together, by turning the shared power resource on. A set of devices with this
+property is often referred to as a power domain.
+
+Support for power domains is provided through the pwr_domain field of struct
+device. This field is a pointer to an object of type struct dev_power_domain,
+defined in include/linux/pm.h, providing a set of power management callbacks
+analogous to the subsystem-level and device driver callbacks that are executed
+for the given device during all power transitions, in addition to the respective
+subsystem-level callbacks. Specifically, the power domain "suspend" callbacks
+(i.e. ->runtime_suspend(), ->suspend(), ->freeze(), ->poweroff(), etc.) are
+executed after the analogous subsystem-level callbacks, while the power domain
+"resume" callbacks (i.e. ->runtime_resume(), ->resume(), ->thaw(), ->restore,
+etc.) are executed before the analogous subsystem-level callbacks. Error codes
+returned by the "suspend" and "resume" power domain callbacks are ignored.
+
+Power domain ->runtime_idle() callback is executed before the subsystem-level
+->runtime_idle() callback and the result returned by it is not ignored. Namely,
+if it returns error code, the subsystem-level ->runtime_idle() callback will not
+be called and the helper function rpm_idle() executing it will return error
+code. This mechanism is intended to help platforms where saving device state
+is a time consuming operation and should only be carried out if all devices
+in the power domain are idle, before turning off the shared power resource(s).
+Namely, the power domain ->runtime_idle() callback may return error code until
+the pm_runtime_idle() helper (or its asychronous version) has been called for
+all devices in the power domain (it is recommended that the returned error code
+be -EBUSY in those cases), preventing the subsystem-level ->runtime_idle()
+callback from being run prematurely.
+
+The support for device power domains is only relevant to platforms needing to
+use the same subsystem-level (e.g. platform bus type) and device driver power
+management callbacks in many different power domain configurations and wanting
+to avoid incorporating the support for power domains into the subsystem-level
+callbacks. The other platforms need not implement it or take it into account
+in any way.
+
+
System Devices
--------------
System devices (sysdevs) follow a slightly different API, which can be found in
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index ffe55ffa540a..654097b130b4 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -1,6 +1,6 @@
Run-time Power Management Framework for I/O Devices
-(C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
(C) 2010 Alan Stern <stern@rowland.harvard.edu>
1. Introduction
@@ -44,11 +44,12 @@ struct dev_pm_ops {
};
The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks are
-executed by the PM core for either the bus type, or device type (if the bus
-type's callback is not defined), or device class (if the bus type's and device
-type's callbacks are not defined) of given device. The bus type, device type
-and device class callbacks are referred to as subsystem-level callbacks in what
-follows.
+executed by the PM core for either the device type, or the class (if the device
+type's struct dev_pm_ops object does not exist), or the bus type (if the
+device type's and class' struct dev_pm_ops objects do not exist) of the given
+device (this allows device types to override callbacks provided by bus types or
+classes if necessary). The bus type, device type and class callbacks are
+referred to as subsystem-level callbacks in what follows.
By default, the callbacks are always invoked in process context with interrupts
enabled. However, subsystems can use the pm_runtime_irq_safe() helper function
diff --git a/Documentation/power/states.txt b/Documentation/power/states.txt
index 34800cc521bf..4416b28630df 100644
--- a/Documentation/power/states.txt
+++ b/Documentation/power/states.txt
@@ -62,12 +62,12 @@ setup via another operating system for it to use. Despite the
inconvenience, this method requires minimal work by the kernel, since
the firmware will also handle restoring memory contents on resume.
-For suspend-to-disk, a mechanism called swsusp called 'swsusp' (Swap
-Suspend) is used to write memory contents to free swap space.
-swsusp has some restrictive requirements, but should work in most
-cases. Some, albeit outdated, documentation can be found in
-Documentation/power/swsusp.txt. Alternatively, userspace can do most
-of the actual suspend to disk work, see userland-swsusp.txt.
+For suspend-to-disk, a mechanism called 'swsusp' (Swap Suspend) is used
+to write memory contents to free swap space. swsusp has some restrictive
+requirements, but should work in most cases. Some, albeit outdated,
+documentation can be found in Documentation/power/swsusp.txt.
+Alternatively, userspace can do most of the actual suspend to disk work,
+see userland-swsusp.txt.
Once memory state is written to disk, the system may either enter a
low-power state (like ACPI S4), or it may simply power down. Powering
diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index 9104c1062084..250160469d83 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -178,38 +178,29 @@ RTC class framework, but can't be supported by the older driver.
setting the longer alarm time and enabling its IRQ using a single
request (using the same model as EFI firmware).
- * RTC_UIE_ON, RTC_UIE_OFF ... if the RTC offers IRQs, it probably
- also offers update IRQs whenever the "seconds" counter changes.
- If needed, the RTC framework can emulate this mechanism.
+ * RTC_UIE_ON, RTC_UIE_OFF ... if the RTC offers IRQs, the RTC framework
+ will emulate this mechanism.
- * RTC_PIE_ON, RTC_PIE_OFF, RTC_IRQP_SET, RTC_IRQP_READ ... another
- feature often accessible with an IRQ line is a periodic IRQ, issued
- at settable frequencies (usually 2^N Hz).
+ * RTC_PIE_ON, RTC_PIE_OFF, RTC_IRQP_SET, RTC_IRQP_READ ... these icotls
+ are emulated via a kernel hrtimer.
In many cases, the RTC alarm can be a system wake event, used to force
Linux out of a low power sleep state (or hibernation) back to a fully
operational state. For example, a system could enter a deep power saving
state until it's time to execute some scheduled tasks.
-Note that many of these ioctls need not actually be implemented by your
-driver. The common rtc-dev interface handles many of these nicely if your
-driver returns ENOIOCTLCMD. Some common examples:
+Note that many of these ioctls are handled by the common rtc-dev interface.
+Some common examples:
* RTC_RD_TIME, RTC_SET_TIME: the read_time/set_time functions will be
called with appropriate values.
- * RTC_ALM_SET, RTC_ALM_READ, RTC_WKALM_SET, RTC_WKALM_RD: the
- set_alarm/read_alarm functions will be called.
+ * RTC_ALM_SET, RTC_ALM_READ, RTC_WKALM_SET, RTC_WKALM_RD: gets or sets
+ the alarm rtc_timer. May call the set_alarm driver function.
- * RTC_IRQP_SET, RTC_IRQP_READ: the irq_set_freq function will be called
- to set the frequency while the framework will handle the read for you
- since the frequency is stored in the irq_freq member of the rtc_device
- structure. Your driver needs to initialize the irq_freq member during
- init. Make sure you check the requested frequency is in range of your
- hardware in the irq_set_freq function. If it isn't, return -EINVAL. If
- you cannot actually change the frequency, do not define irq_set_freq.
+ * RTC_IRQP_SET, RTC_IRQP_READ: These are emulated by the generic code.
- * RTC_PIE_ON, RTC_PIE_OFF: the irq_set_state function will be called.
+ * RTC_PIE_ON, RTC_PIE_OFF: These are also emulated by the generic code.
If all else fails, check out the rtc-test.c driver!
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
index 178c831b907d..2e3c64b1a6a5 100644
--- a/Documentation/spinlocks.txt
+++ b/Documentation/spinlocks.txt
@@ -86,7 +86,7 @@ to change the variables it has to get an exclusive write lock.
The routines look the same as above:
- rwlock_t xxx_lock = RW_LOCK_UNLOCKED;
+ rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
unsigned long flags;
@@ -196,25 +196,3 @@ appropriate:
For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
-
-SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. These interfere
-with lockdep state tracking.
-
-Most of the time, you can simply turn:
- static spinlock_t xxx_lock = SPIN_LOCK_UNLOCKED;
-into:
- static DEFINE_SPINLOCK(xxx_lock);
-
-Static structure member variables go from:
-
- struct foo bar {
- .lock = SPIN_LOCK_UNLOCKED;
- };
-
-to:
-
- struct foo bar {
- .lock = __SPIN_LOCK_UNLOCKED(bar.lock);
- };
-
-Declaration of static rw_locks undergo a similar transformation.
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index dc52bd442c92..79fcafc7fd64 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -247,6 +247,13 @@ You need very few things to get the syscalls tracing in an arch.
- Support the TIF_SYSCALL_TRACEPOINT thread flags.
- Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace
in the ptrace syscalls tracing path.
+- If the system call table on this arch is more complicated than a simple array
+ of addresses of the system calls, implement an arch_syscall_addr to return
+ the address of a given system call.
+- If the symbol names of the system calls do not match the function names on
+ this arch, define ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and
+ implement arch_syscall_match_sym_name with the appropriate logic to return
+ true if the function name corresponds with the symbol name.
- Tag this arch as HAVE_SYSCALL_TRACEPOINTS.
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 557c1edeccaf..1ebc24cf9a55 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -80,11 +80,11 @@ of ftrace. Here is a list of some of the key files:
tracers listed here can be configured by
echoing their name into current_tracer.
- tracing_enabled:
+ tracing_on:
- This sets or displays whether the current_tracer
- is activated and tracing or not. Echo 0 into this
- file to disable the tracer or 1 to enable it.
+ This sets or displays whether writing to the trace
+ ring buffer is enabled. Echo 0 into this file to disable
+ the tracer or 1 to enable it.
trace:
@@ -202,10 +202,6 @@ Here is the list of current tracers that may be configured.
to draw a graph of function calls similar to C code
source.
- "sched_switch"
-
- Traces the context switches and wakeups between tasks.
-
"irqsoff"
Traces the areas that disable interrupts and saves
@@ -273,39 +269,6 @@ format, the function name that was traced "path_put" and the
parent function that called this function "path_walk". The
timestamp is the time at which the function was entered.
-The sched_switch tracer also includes tracing of task wakeups
-and context switches.
-
- ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S
- ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S
- ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R
- events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R
- kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R
- ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R
-
-Wake ups are represented by a "+" and the context switches are
-shown as "==>". The format is:
-
- Context switches:
-
- Previous task Next Task
-
- <pid>:<prio>:<state> ==> <pid>:<prio>:<state>
-
- Wake ups:
-
- Current task Task waking up
-
- <pid>:<prio>:<state> + <pid>:<prio>:<state>
-
-The prio is the internal kernel priority, which is the inverse
-of the priority that is usually displayed by user-space tools.
-Zero represents the highest priority (99). Prio 100 starts the
-"nice" priorities with 100 being equal to nice -20 and 139 being
-nice 19. The prio "140" is reserved for the idle task which is
-the lowest priority thread (pid 0).
-
-
Latency trace format
--------------------
@@ -491,78 +454,10 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
latencies, as described in "Latency
trace format".
-sched_switch
-------------
-
-This tracer simply records schedule switches. Here is an example
-of how to use it.
-
- # echo sched_switch > current_tracer
- # echo 1 > tracing_enabled
- # sleep 1
- # echo 0 > tracing_enabled
- # cat trace
-
-# tracer: sched_switch
-#
-# TASK-PID CPU# TIMESTAMP FUNCTION
-# | | | | |
- bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R
- bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R
- sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R
- bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S
- bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R
- sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R
- bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D
- bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R
- <idle>-0 [00] 240.132589: 0:140:R + 4:115:S
- <idle>-0 [00] 240.132591: 0:140:R ==> 4:115:R
- ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R
- <idle>-0 [00] 240.132598: 0:140:R + 4:115:S
- <idle>-0 [00] 240.132599: 0:140:R ==> 4:115:R
- ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R
- sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R
- [...]
-
-
-As we have discussed previously about this format, the header
-shows the name of the trace and points to the options. The
-"FUNCTION" is a misnomer since here it represents the wake ups
-and context switches.
-
-The sched_switch file only lists the wake ups (represented with
-'+') and context switches ('==>') with the previous task or
-current task first followed by the next task or task waking up.
-The format for both of these is PID:KERNEL-PRIO:TASK-STATE.
-Remember that the KERNEL-PRIO is the inverse of the actual
-priority with zero (0) being the highest priority and the nice
-values starting at 100 (nice -20). Below is a quick chart to map
-the kernel priority to user land priorities.
-
- Kernel Space User Space
- ===============================================================
- 0(high) to 98(low) user RT priority 99(high) to 1(low)
- with SCHED_RR or SCHED_FIFO
- ---------------------------------------------------------------
- 99 sched_priority is not used in scheduling
- decisions(it must be specified as 0)
- ---------------------------------------------------------------
- 100(high) to 139(low) user nice -20(high) to 19(low)
- ---------------------------------------------------------------
- 140 idle task priority
- ---------------------------------------------------------------
-
-The task states are:
-
- R - running : wants to run, may not actually be running
- S - sleep : process is waiting to be woken up (handles signals)
- D - disk sleep (uninterruptible sleep) : process must be woken up
- (ignores signals)
- T - stopped : process suspended
- t - traced : process is being traced (with something like gdb)
- Z - zombie : process waiting to be cleaned up
- X - unknown
-
+ overwrite - This controls what happens when the trace buffer is
+ full. If "1" (default), the oldest events are
+ discarded and overwritten. If "0", then the newest
+ events are discarded.
ftrace_enabled
--------------
@@ -607,10 +502,10 @@ an example:
# echo irqsoff > current_tracer
# echo latency-format > trace_options
# echo 0 > tracing_max_latency
- # echo 1 > tracing_enabled
+ # echo 1 > tracing_on
# ls -ltr
[...]
- # echo 0 > tracing_enabled
+ # echo 0 > tracing_on
# cat trace
# tracer: irqsoff
#
@@ -715,10 +610,10 @@ is much like the irqsoff tracer.
# echo preemptoff > current_tracer
# echo latency-format > trace_options
# echo 0 > tracing_max_latency
- # echo 1 > tracing_enabled
+ # echo 1 > tracing_on
# ls -ltr
[...]
- # echo 0 > tracing_enabled
+ # echo 0 > tracing_on
# cat trace
# tracer: preemptoff
#
@@ -863,10 +758,10 @@ tracers.
# echo preemptirqsoff > current_tracer
# echo latency-format > trace_options
# echo 0 > tracing_max_latency
- # echo 1 > tracing_enabled
+ # echo 1 > tracing_on
# ls -ltr
[...]
- # echo 0 > tracing_enabled
+ # echo 0 > tracing_on
# cat trace
# tracer: preemptirqsoff
#
@@ -1026,9 +921,9 @@ Instead of performing an 'ls', we will run 'sleep 1' under
# echo wakeup > current_tracer
# echo latency-format > trace_options
# echo 0 > tracing_max_latency
- # echo 1 > tracing_enabled
+ # echo 1 > tracing_on
# chrt -f 5 sleep 1
- # echo 0 > tracing_enabled
+ # echo 0 > tracing_on
# cat trace
# tracer: wakeup
#
@@ -1140,9 +1035,9 @@ ftrace_enabled is set; otherwise this tracer is a nop.
# sysctl kernel.ftrace_enabled=1
# echo function > current_tracer
- # echo 1 > tracing_enabled
+ # echo 1 > tracing_on
# usleep 1
- # echo 0 > tracing_enabled
+ # echo 0 > tracing_on
# cat trace
# tracer: function
#
@@ -1180,7 +1075,7 @@ int trace_fd;
[...]
int main(int argc, char *argv[]) {
[...]
- trace_fd = open(tracing_file("tracing_enabled"), O_WRONLY);
+ trace_fd = open(tracing_file("tracing_on"), O_WRONLY);
[...]
if (condition_hit()) {
write(trace_fd, "0", 1);
@@ -1631,9 +1526,9 @@ If I am only interested in sys_nanosleep and hrtimer_interrupt:
# echo sys_nanosleep hrtimer_interrupt \
> set_ftrace_filter
# echo function > current_tracer
- # echo 1 > tracing_enabled
+ # echo 1 > tracing_on
# usleep 1
- # echo 0 > tracing_enabled
+ # echo 0 > tracing_on
# cat trace
# tracer: ftrace
#
@@ -1879,9 +1774,9 @@ different. The trace is live.
# echo function > current_tracer
# cat trace_pipe > /tmp/trace.out &
[1] 4153
- # echo 1 > tracing_enabled
+ # echo 1 > tracing_on
# usleep 1
- # echo 0 > tracing_enabled
+ # echo 0 > tracing_on
# cat trace
# tracer: function
#
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 5f77d94598dd..6d27ab8d6e9f 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -42,11 +42,25 @@ Synopsis of kprobe_events
+|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
- (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported.
+ (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
+ are supported.
(*) only for return probe.
(**) this is useful for fetching a field of data structures.
+Types
+-----
+Several types are supported for fetch-args. Kprobe tracer will access memory
+by given type. Prefix 's' and 'u' means those types are signed and unsigned
+respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
+String type is a special type, which fetches a "null-terminated" string from
+kernel space. This means it will fail and store NULL if the string container
+has been paged out.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-
+offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
Per-Probe Event Filtering
-------------------------
diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
index 996a27d9b8db..01c513fac40e 100644
--- a/Documentation/workqueue.txt
+++ b/Documentation/workqueue.txt
@@ -190,9 +190,9 @@ resources, scheduled and executed.
* Long running CPU intensive workloads which can be better
managed by the system scheduler.
- WQ_FREEZEABLE
+ WQ_FREEZABLE
- A freezeable wq participates in the freeze phase of the system
+ A freezable wq participates in the freeze phase of the system
suspend operations. Work items on the wq are drained and no
new work item starts execution until thawed.
diff --git a/MAINTAINERS b/MAINTAINERS
index 5dd6c751e6a6..2a2cddd2f88c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -557,6 +557,13 @@ S: Maintained
F: drivers/net/appletalk/
F: net/appletalk/
+ARASAN COMPACT FLASH PATA CONTROLLER
+M: Viresh Kumar <viresh.kumar@st.com>
+L: linux-ide@vger.kernel.org
+S: Maintained
+F: include/linux/pata_arasan_cf_data.h
+F: drivers/ata/pata_arasan_cf.c
+
ARC FRAMEBUFFER DRIVER
M: Jaya Kumar <jayalk@intworks.biz>
S: Maintained
@@ -885,7 +892,7 @@ S: Supported
ARM/QUALCOMM MSM MACHINE SUPPORT
M: David Brown <davidb@codeaurora.org>
-M: Daniel Walker <dwalker@codeaurora.org>
+M: Daniel Walker <dwalker@fifo99.com>
M: Bryan Huntsman <bryanh@codeaurora.org>
L: linux-arm-msm@vger.kernel.org
F: arch/arm/mach-msm/
@@ -1010,6 +1017,15 @@ L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/mach-s5p*/
+ARM/SAMSUNG MOBILE MACHINE SUPPORT
+M: Kyungmin Park <kyungmin.park@samsung.com>
+L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S: Maintained
+F: arch/arm/mach-s5pv210/mach-aquila.c
+F: arch/arm/mach-s5pv210/mach-goni.c
+F: arch/arm/mach-exynos4/mach-universal_c210.c
+F: arch/arm/mach-exynos4/mach-nuri.c
+
ARM/SAMSUNG S5P SERIES FIMC SUPPORT
M: Kyungmin Park <kyungmin.park@samsung.com>
M: Sylwester Nawrocki <s.nawrocki@samsung.com>
@@ -1467,6 +1483,7 @@ F: include/net/bluetooth/
BONDING DRIVER
M: Jay Vosburgh <fubar@us.ibm.com>
+M: Andy Gospodarek <andy@greyhouse.net>
L: netdev@vger.kernel.org
W: http://sourceforge.net/projects/bonding/
S: Supported
@@ -1692,6 +1709,13 @@ M: Andy Whitcroft <apw@canonical.com>
S: Supported
F: scripts/checkpatch.pl
+CHINESE DOCUMENTATION
+M: Harry Wei <harryxiyou@gmail.com>
+L: xiyoulinuxkernelgroup@googlegroups.com
+L: linux-kernel@zh-kernel.org (moderated for non-subscribers)
+S: Maintained
+F: Documentation/zh_CN/
+
CISCO VIC ETHERNET NIC DRIVER
M: Vasanthy Kolluri <vkolluri@cisco.com>
M: Roopa Prabhu <roprabhu@cisco.com>
@@ -2026,7 +2050,7 @@ F: Documentation/scsi/dc395x.txt
F: drivers/scsi/dc395x.*
DCCP PROTOCOL
-M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+M: Gerrit Renker <gerrit@erg.abdn.ac.uk>
L: dccp@vger.kernel.org
W: http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp
S: Maintained
@@ -2873,7 +2897,6 @@ M: Guenter Roeck <guenter.roeck@ericsson.com>
L: lm-sensors@lm-sensors.org
W: http://www.lm-sensors.org/
T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/
-T: quilt kernel.org/pub/linux/kernel/people/groeck/linux-staging/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
S: Maintained
F: Documentation/hwmon/
@@ -3513,7 +3536,7 @@ F: drivers/hwmon/jc42.c
F: Documentation/hwmon/jc42
JFS FILESYSTEM
-M: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
+M: Dave Kleikamp <shaggy@kernel.org>
L: jfs-discussion@lists.sourceforge.net
W: http://jfs.sourceforge.net/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shaggy/jfs-2.6.git
@@ -4276,10 +4299,7 @@ S: Maintained
F: net/sched/sch_netem.c
NETERION 10GbE DRIVERS (s2io/vxge)
-M: Ramkrishna Vepa <ramkrishna.vepa@exar.com>
-M: Sivakumar Subramani <sivakumar.subramani@exar.com>
-M: Sreenivasa Honnur <sreenivasa.honnur@exar.com>
-M: Jon Mason <jon.mason@exar.com>
+M: Jon Mason <jdmason@kudzu.us>
L: netdev@vger.kernel.org
W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/Linux?Anonymous
W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/X3100Linux?Anonymous
@@ -5165,6 +5185,7 @@ F: drivers/char/random.c
RAPIDIO SUBSYSTEM
M: Matt Porter <mporter@kernel.crashing.org>
+M: Alexandre Bounine <alexandre.bounine@idt.com>
S: Maintained
F: drivers/rapidio/
@@ -5267,7 +5288,7 @@ S: Maintained
F: drivers/net/wireless/rtl818x/rtl8180/
RTL8187 WIRELESS DRIVER
-M: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
+M: Herton Ronaldo Krzesinski <herton@canonical.com>
M: Hin-Tak Leung <htl10@users.sourceforge.net>
M: Larry Finger <Larry.Finger@lwfinger.net>
L: linux-wireless@vger.kernel.org
@@ -6105,7 +6126,7 @@ S: Maintained
F: security/tomoyo/
TOPSTAR LAPTOP EXTRAS DRIVER
-M: Herton Ronaldo Krzesinski <herton@mandriva.com.br>
+M: Herton Ronaldo Krzesinski <herton@canonical.com>
L: platform-driver-x86@vger.kernel.org
S: Maintained
F: drivers/platform/x86/topstar-laptop.c
diff --git a/Makefile b/Makefile
index 5e40aa2acbff..d6592b63c8cb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 38
-EXTRAVERSION = -rc5
+EXTRAVERSION =
NAME = Flesh-Eating Bats with Fangs
# *DOCUMENTATION*
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 47f63d480141..cc31bec2e316 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -11,6 +11,7 @@ config ALPHA
select HAVE_GENERIC_HARDIRQS
select GENERIC_IRQ_PROBE
select AUTO_IRQ_AFFINITY if SMP
+ select GENERIC_HARDIRQS_NO_DEPRECATED
help
The Alpha is a 64-bit general-purpose processor designed and
marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h
index 70145cbb21cb..1b71ca70c9f6 100644
--- a/arch/alpha/include/asm/fcntl.h
+++ b/arch/alpha/include/asm/fcntl.h
@@ -31,6 +31,8 @@
#define __O_SYNC 020000000
#define O_SYNC (__O_SYNC|O_DSYNC)
+#define O_PATH 040000000
+
#define F_GETLK 7
#define F_SETLK 8
#define F_SETLKW 9
diff --git a/arch/alpha/include/asm/futex.h b/arch/alpha/include/asm/futex.h
index 945de222ab91..e8a761aee088 100644
--- a/arch/alpha/include/asm/futex.h
+++ b/arch/alpha/include/asm/futex.h
@@ -29,7 +29,7 @@
: "r" (uaddr), "r"(oparg) \
: "memory")
-static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -39,7 +39,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -81,21 +81,23 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- int prev, cmp;
+ int ret = 0, cmp;
+ u32 prev;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
__asm__ __volatile__ (
__ASM_SMP_MB
- "1: ldl_l %0,0(%2)\n"
- " cmpeq %0,%3,%1\n"
- " beq %1,3f\n"
- " mov %4,%1\n"
- "2: stl_c %1,0(%2)\n"
- " beq %1,4f\n"
+ "1: ldl_l %1,0(%3)\n"
+ " cmpeq %1,%4,%2\n"
+ " beq %2,3f\n"
+ " mov %5,%2\n"
+ "2: stl_c %2,0(%3)\n"
+ " beq %2,4f\n"
"3: .subsection 2\n"
"4: br 1b\n"
" .previous\n"
@@ -105,11 +107,12 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
" .long 2b-.\n"
" lda $31,3b-2b(%0)\n"
" .previous\n"
- : "=&r"(prev), "=&r"(cmp)
+ : "+r"(ret), "=&r"(prev), "=&r"(cmp)
: "r"(uaddr), "r"((long)oldval), "r"(newval)
: "memory");
- return prev;
+ *uval = prev;
+ return ret;
}
#endif /* __KERNEL__ */
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 1570c0b54336..a83bbea62c67 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -13,44 +13,13 @@
#ifdef __KERNEL__
#include <linux/compiler.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- long count;
#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
static inline void __down_read(struct rw_semaphore *sem)
{
@@ -250,10 +219,5 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
#endif
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index 9ab234f48dd8..a19d60082299 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -44,11 +44,16 @@ static char irq_user_affinity[NR_IRQS];
int irq_select_affinity(unsigned int irq)
{
- struct irq_desc *desc = irq_to_desc[irq];
+ struct irq_data *data = irq_get_irq_data(irq);
+ struct irq_chip *chip;
static int last_cpu;
int cpu = last_cpu + 1;
- if (!desc || !get_irq_desc_chip(desc)->set_affinity || irq_user_affinity[irq])
+ if (!data)
+ return 1;
+ chip = irq_data_get_irq_chip(data);
+
+ if (!chip->irq_set_affinity || irq_user_affinity[irq])
return 1;
while (!cpu_possible(cpu) ||
@@ -56,8 +61,8 @@ int irq_select_affinity(unsigned int irq)
cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
last_cpu = cpu;
- cpumask_copy(desc->affinity, cpumask_of(cpu));
- get_irq_desc_chip(desc)->set_affinity(irq, cpumask_of(cpu));
+ cpumask_copy(data->affinity, cpumask_of(cpu));
+ chip->irq_set_affinity(data, cpumask_of(cpu), false);
return 0;
}
#endif /* CONFIG_SMP */
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index 2d0679b60939..411ca11d0a18 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -228,14 +228,9 @@ struct irqaction timer_irqaction = {
void __init
init_rtc_irq(void)
{
- struct irq_desc *desc = irq_to_desc(RTC_IRQ);
-
- if (desc) {
- desc->status |= IRQ_DISABLED;
- set_irq_chip_and_handler_name(RTC_IRQ, &no_irq_chip,
- handle_simple_irq, "RTC");
- setup_irq(RTC_IRQ, &timer_irqaction);
- }
+ set_irq_chip_and_handler_name(RTC_IRQ, &no_irq_chip,
+ handle_simple_irq, "RTC");
+ setup_irq(RTC_IRQ, &timer_irqaction);
}
/* Dummy irqactions. */
diff --git a/arch/alpha/kernel/irq_i8259.c b/arch/alpha/kernel/irq_i8259.c
index 956ea0ed1694..c7cc9813e45f 100644
--- a/arch/alpha/kernel/irq_i8259.c
+++ b/arch/alpha/kernel/irq_i8259.c
@@ -33,10 +33,10 @@ i8259_update_irq_hw(unsigned int irq, unsigned long mask)
}
inline void
-i8259a_enable_irq(unsigned int irq)
+i8259a_enable_irq(struct irq_data *d)
{
spin_lock(&i8259_irq_lock);
- i8259_update_irq_hw(irq, cached_irq_mask &= ~(1 << irq));
+ i8259_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << d->irq));
spin_unlock(&i8259_irq_lock);
}
@@ -47,16 +47,18 @@ __i8259a_disable_irq(unsigned int irq)
}
void
-i8259a_disable_irq(unsigned int irq)
+i8259a_disable_irq(struct irq_data *d)
{
spin_lock(&i8259_irq_lock);
- __i8259a_disable_irq(irq);
+ __i8259a_disable_irq(d->irq);
spin_unlock(&i8259_irq_lock);
}
void
-i8259a_mask_and_ack_irq(unsigned int irq)
+i8259a_mask_and_ack_irq(struct irq_data *d)
{
+ unsigned int irq = d->irq;
+
spin_lock(&i8259_irq_lock);
__i8259a_disable_irq(irq);
@@ -71,9 +73,9 @@ i8259a_mask_and_ack_irq(unsigned int irq)
struct irq_chip i8259a_irq_type = {
.name = "XT-PIC",
- .unmask = i8259a_enable_irq,
- .mask = i8259a_disable_irq,
- .mask_ack = i8259a_mask_and_ack_irq,
+ .irq_unmask = i8259a_enable_irq,
+ .irq_mask = i8259a_disable_irq,
+ .irq_mask_ack = i8259a_mask_and_ack_irq,
};
void __init
diff --git a/arch/alpha/kernel/irq_impl.h b/arch/alpha/kernel/irq_impl.h
index b63ccd7386f1..d507a234b05d 100644
--- a/arch/alpha/kernel/irq_impl.h
+++ b/arch/alpha/kernel/irq_impl.h
@@ -31,11 +31,9 @@ extern void init_rtc_irq(void);
extern void common_init_isa_dma(void);
-extern void i8259a_enable_irq(unsigned int);
-extern void i8259a_disable_irq(unsigned int);
-extern void i8259a_mask_and_ack_irq(unsigned int);
-extern unsigned int i8259a_startup_irq(unsigned int);
-extern void i8259a_end_irq(unsigned int);
+extern void i8259a_enable_irq(struct irq_data *d);
+extern void i8259a_disable_irq(struct irq_data *d);
+extern void i8259a_mask_and_ack_irq(struct irq_data *d);
extern struct irq_chip i8259a_irq_type;
extern void init_i8259a_irqs(void);
diff --git a/arch/alpha/kernel/irq_pyxis.c b/arch/alpha/kernel/irq_pyxis.c
index 2863458c853e..b30227fa7f5f 100644
--- a/arch/alpha/kernel/irq_pyxis.c
+++ b/arch/alpha/kernel/irq_pyxis.c
@@ -29,21 +29,21 @@ pyxis_update_irq_hw(unsigned long mask)
}
static inline void
-pyxis_enable_irq(unsigned int irq)
+pyxis_enable_irq(struct irq_data *d)
{
- pyxis_update_irq_hw(cached_irq_mask |= 1UL << (irq - 16));
+ pyxis_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16));
}
static void
-pyxis_disable_irq(unsigned int irq)
+pyxis_disable_irq(struct irq_data *d)
{
- pyxis_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16)));
+ pyxis_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16)));
}
static void
-pyxis_mask_and_ack_irq(unsigned int irq)
+pyxis_mask_and_ack_irq(struct irq_data *d)
{
- unsigned long bit = 1UL << (irq - 16);
+ unsigned long bit = 1UL << (d->irq - 16);
unsigned long mask = cached_irq_mask &= ~bit;
/* Disable the interrupt. */
@@ -58,9 +58,9 @@ pyxis_mask_and_ack_irq(unsigned int irq)
static struct irq_chip pyxis_irq_type = {
.name = "PYXIS",
- .mask_ack = pyxis_mask_and_ack_irq,
- .mask = pyxis_disable_irq,
- .unmask = pyxis_enable_irq,
+ .irq_mask_ack = pyxis_mask_and_ack_irq,
+ .irq_mask = pyxis_disable_irq,
+ .irq_unmask = pyxis_enable_irq,
};
void
@@ -103,7 +103,7 @@ init_pyxis_irqs(unsigned long ignore_mask)
if ((ignore_mask >> i) & 1)
continue;
set_irq_chip_and_handler(i, &pyxis_irq_type, handle_level_irq);
- irq_to_desc(i)->status |= IRQ_LEVEL;
+ irq_set_status_flags(i, IRQ_LEVEL);
}
setup_irq(16+7, &isa_cascade_irqaction);
diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c
index 0e57e828b413..82a47bba41c4 100644
--- a/arch/alpha/kernel/irq_srm.c
+++ b/arch/alpha/kernel/irq_srm.c
@@ -18,27 +18,27 @@
DEFINE_SPINLOCK(srm_irq_lock);
static inline void
-srm_enable_irq(unsigned int irq)
+srm_enable_irq(struct irq_data *d)
{
spin_lock(&srm_irq_lock);
- cserve_ena(irq - 16);
+ cserve_ena(d->irq - 16);
spin_unlock(&srm_irq_lock);
}
static void
-srm_disable_irq(unsigned int irq)
+srm_disable_irq(struct irq_data *d)
{
spin_lock(&srm_irq_lock);
- cserve_dis(irq - 16);
+ cserve_dis(d->irq - 16);
spin_unlock(&srm_irq_lock);
}
/* Handle interrupts from the SRM, assuming no additional weirdness. */
static struct irq_chip srm_irq_type = {
.name = "SRM",
- .unmask = srm_enable_irq,
- .mask = srm_disable_irq,
- .mask_ack = srm_disable_irq,
+ .irq_unmask = srm_enable_irq,
+ .irq_mask = srm_disable_irq,
+ .irq_mask_ack = srm_disable_irq,
};
void __init
@@ -52,7 +52,7 @@ init_srm_irqs(long max, unsigned long ignore_mask)
if (i < 64 && ((ignore_mask >> i) & 1))
continue;
set_irq_chip_and_handler(i, &srm_irq_type, handle_level_irq);
- irq_to_desc(i)->status |= IRQ_LEVEL;
+ irq_set_status_flags(i, IRQ_LEVEL);
}
}
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index fe698b5045e9..376f22130791 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -230,44 +230,24 @@ linux_to_osf_statfs(struct kstatfs *linux_stat, struct osf_statfs __user *osf_st
return copy_to_user(osf_stat, &tmp_stat, bufsiz) ? -EFAULT : 0;
}
-static int
-do_osf_statfs(struct path *path, struct osf_statfs __user *buffer,
- unsigned long bufsiz)
+SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
+ struct osf_statfs __user *, buffer, unsigned long, bufsiz)
{
struct kstatfs linux_stat;
- int error = vfs_statfs(path, &linux_stat);
+ int error = user_statfs(pathname, &linux_stat);
if (!error)
error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
return error;
}
-SYSCALL_DEFINE3(osf_statfs, const char __user *, pathname,
- struct osf_statfs __user *, buffer, unsigned long, bufsiz)
-{
- struct path path;
- int retval;
-
- retval = user_path(pathname, &path);
- if (!retval) {
- retval = do_osf_statfs(&path, buffer, bufsiz);
- path_put(&path);
- }
- return retval;
-}
-
SYSCALL_DEFINE3(osf_fstatfs, unsigned long, fd,
struct osf_statfs __user *, buffer, unsigned long, bufsiz)
{
- struct file *file;
- int retval;
-
- retval = -EBADF;
- file = fget(fd);
- if (file) {
- retval = do_osf_statfs(&file->f_path, buffer, bufsiz);
- fput(file);
- }
- return retval;
+ struct kstatfs linux_stat;
+ int error = fd_statfs(fd, &linux_stat);
+ if (!error)
+ error = linux_to_osf_statfs(&linux_stat, buffer, bufsiz);
+ return error;
}
/*
diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c
index 7bef61768236..88d95e872f55 100644
--- a/arch/alpha/kernel/sys_alcor.c
+++ b/arch/alpha/kernel/sys_alcor.c
@@ -44,31 +44,31 @@ alcor_update_irq_hw(unsigned long mask)
}
static inline void
-alcor_enable_irq(unsigned int irq)
+alcor_enable_irq(struct irq_data *d)
{
- alcor_update_irq_hw(cached_irq_mask |= 1UL << (irq - 16));
+ alcor_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16));
}
static void
-alcor_disable_irq(unsigned int irq)
+alcor_disable_irq(struct irq_data *d)
{
- alcor_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16)));
+ alcor_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16)));
}
static void
-alcor_mask_and_ack_irq(unsigned int irq)
+alcor_mask_and_ack_irq(struct irq_data *d)
{
- alcor_disable_irq(irq);
+ alcor_disable_irq(d);
/* On ALCOR/XLT, need to dismiss interrupt via GRU. */
- *(vuip)GRU_INT_CLEAR = 1 << (irq - 16); mb();
+ *(vuip)GRU_INT_CLEAR = 1 << (d->irq - 16); mb();
*(vuip)GRU_INT_CLEAR = 0; mb();
}
static void
-alcor_isa_mask_and_ack_irq(unsigned int irq)
+alcor_isa_mask_and_ack_irq(struct irq_data *d)
{
- i8259a_mask_and_ack_irq(irq);
+ i8259a_mask_and_ack_irq(d);
/* On ALCOR/XLT, need to dismiss interrupt via GRU. */
*(vuip)GRU_INT_CLEAR = 0x80000000; mb();
@@ -77,9 +77,9 @@ alcor_isa_mask_and_ack_irq(unsigned int irq)
static struct irq_chip alcor_irq_type = {
.name = "ALCOR",
- .unmask = alcor_enable_irq,
- .mask = alcor_disable_irq,
- .mask_ack = alcor_mask_and_ack_irq,
+ .irq_unmask = alcor_enable_irq,
+ .irq_mask = alcor_disable_irq,
+ .irq_mask_ack = alcor_mask_and_ack_irq,
};
static void
@@ -126,9 +126,9 @@ alcor_init_irq(void)
if (i >= 16+20 && i <= 16+30)
continue;
set_irq_chip_and_handler(i, &alcor_irq_type, handle_level_irq);
- irq_to_desc(i)->status |= IRQ_LEVEL;
+ irq_set_status_flags(i, IRQ_LEVEL);
}
- i8259a_irq_type.ack = alcor_isa_mask_and_ack_irq;
+ i8259a_irq_type.irq_ack = alcor_isa_mask_and_ack_irq;
init_i8259a_irqs();
common_init_isa_dma();
diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c
index b0c916493aea..57eb6307bc27 100644
--- a/arch/alpha/kernel/sys_cabriolet.c
+++ b/arch/alpha/kernel/sys_cabriolet.c
@@ -46,22 +46,22 @@ cabriolet_update_irq_hw(unsigned int irq, unsigned long mask)
}
static inline void
-cabriolet_enable_irq(unsigned int irq)
+cabriolet_enable_irq(struct irq_data *d)
{
- cabriolet_update_irq_hw(irq, cached_irq_mask &= ~(1UL << irq));
+ cabriolet_update_irq_hw(d->irq, cached_irq_mask &= ~(1UL << d->irq));
}
static void
-cabriolet_disable_irq(unsigned int irq)
+cabriolet_disable_irq(struct irq_data *d)
{
- cabriolet_update_irq_hw(irq, cached_irq_mask |= 1UL << irq);
+ cabriolet_update_irq_hw(d->irq, cached_irq_mask |= 1UL << d->irq);
}
static struct irq_chip cabriolet_irq_type = {
.name = "CABRIOLET",
- .unmask = cabriolet_enable_irq,
- .mask = cabriolet_disable_irq,
- .mask_ack = cabriolet_disable_irq,
+ .irq_unmask = cabriolet_enable_irq,
+ .irq_mask = cabriolet_disable_irq,
+ .irq_mask_ack = cabriolet_disable_irq,
};
static void
@@ -107,7 +107,7 @@ common_init_irq(void (*srm_dev_int)(unsigned long v))
for (i = 16; i < 35; ++i) {
set_irq_chip_and_handler(i, &cabriolet_irq_type,
handle_level_irq);
- irq_to_desc(i)->status |= IRQ_LEVEL;
+ irq_set_status_flags(i, IRQ_LEVEL);
}
}
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index edad5f759ccd..481df4ecb651 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -98,37 +98,37 @@ tsunami_update_irq_hw(unsigned long mask)
}
static void
-dp264_enable_irq(unsigned int irq)
+dp264_enable_irq(struct irq_data *d)
{
spin_lock(&dp264_irq_lock);
- cached_irq_mask |= 1UL << irq;
+ cached_irq_mask |= 1UL << d->irq;
tsunami_update_irq_hw(cached_irq_mask);
spin_unlock(&dp264_irq_lock);
}
static void
-dp264_disable_irq(unsigned int irq)
+dp264_disable_irq(struct irq_data *d)
{
spin_lock(&dp264_irq_lock);
- cached_irq_mask &= ~(1UL << irq);
+ cached_irq_mask &= ~(1UL << d->irq);
tsunami_update_irq_hw(cached_irq_mask);
spin_unlock(&dp264_irq_lock);
}
static void
-clipper_enable_irq(unsigned int irq)
+clipper_enable_irq(struct irq_data *d)
{
spin_lock(&dp264_irq_lock);
- cached_irq_mask |= 1UL << (irq - 16);
+ cached_irq_mask |= 1UL << (d->irq - 16);
tsunami_update_irq_hw(cached_irq_mask);
spin_unlock(&dp264_irq_lock);
}
static void
-clipper_disable_irq(unsigned int irq)
+clipper_disable_irq(struct irq_data *d)
{
spin_lock(&dp264_irq_lock);
- cached_irq_mask &= ~(1UL << (irq - 16));
+ cached_irq_mask &= ~(1UL << (d->irq - 16));
tsunami_update_irq_hw(cached_irq_mask);
spin_unlock(&dp264_irq_lock);
}
@@ -149,10 +149,11 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
}
static int
-dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
-{
+dp264_set_affinity(struct irq_data *d, const struct cpumask *affinity,
+ bool force)
+{
spin_lock(&dp264_irq_lock);
- cpu_set_irq_affinity(irq, *affinity);
+ cpu_set_irq_affinity(d->irq, *affinity);
tsunami_update_irq_hw(cached_irq_mask);
spin_unlock(&dp264_irq_lock);
@@ -160,10 +161,11 @@ dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
}
static int
-clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
-{
+clipper_set_affinity(struct irq_data *d, const struct cpumask *affinity,
+ bool force)
+{
spin_lock(&dp264_irq_lock);
- cpu_set_irq_affinity(irq - 16, *affinity);
+ cpu_set_irq_affinity(d->irq - 16, *affinity);
tsunami_update_irq_hw(cached_irq_mask);
spin_unlock(&dp264_irq_lock);
@@ -171,19 +173,19 @@ clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
}
static struct irq_chip dp264_irq_type = {
- .name = "DP264",
- .unmask = dp264_enable_irq,
- .mask = dp264_disable_irq,
- .mask_ack = dp264_disable_irq,
- .set_affinity = dp264_set_affinity,
+ .name = "DP264",
+ .irq_unmask = dp264_enable_irq,
+ .irq_mask = dp264_disable_irq,
+ .irq_mask_ack = dp264_disable_irq,
+ .irq_set_affinity = dp264_set_affinity,
};
static struct irq_chip clipper_irq_type = {
- .name = "CLIPPER",
- .unmask = clipper_enable_irq,
- .mask = clipper_disable_irq,
- .mask_ack = clipper_disable_irq,
- .set_affinity = clipper_set_affinity,
+ .name = "CLIPPER",
+ .irq_unmask = clipper_enable_irq,
+ .irq_mask = clipper_disable_irq,
+ .irq_mask_ack = clipper_disable_irq,
+ .irq_set_affinity = clipper_set_affinity,
};
static void
@@ -268,8 +270,8 @@ init_tsunami_irqs(struct irq_chip * ops, int imin, int imax)
{
long i;
for (i = imin; i <= imax; ++i) {
- irq_to_desc(i)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(i, ops, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
}
}
diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c
index ae5f29d127b0..402e908ffb3e 100644
--- a/arch/alpha/kernel/sys_eb64p.c
+++ b/arch/alpha/kernel/sys_eb64p.c
@@ -44,22 +44,22 @@ eb64p_update_irq_hw(unsigned int irq, unsigned long mask)
}
static inline void
-eb64p_enable_irq(unsigned int irq)
+eb64p_enable_irq(struct irq_data *d)
{
- eb64p_update_irq_hw(irq, cached_irq_mask &= ~(1 << irq));
+ eb64p_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << d->irq));
}
static void
-eb64p_disable_irq(unsigned int irq)
+eb64p_disable_irq(struct irq_data *d)
{
- eb64p_update_irq_hw(irq, cached_irq_mask |= 1 << irq);
+ eb64p_update_irq_hw(d->irq, cached_irq_mask |= 1 << d->irq);
}
static struct irq_chip eb64p_irq_type = {
.name = "EB64P",
- .unmask = eb64p_enable_irq,
- .mask = eb64p_disable_irq,
- .mask_ack = eb64p_disable_irq,
+ .irq_unmask = eb64p_enable_irq,
+ .irq_mask = eb64p_disable_irq,
+ .irq_mask_ack = eb64p_disable_irq,
};
static void
@@ -118,9 +118,9 @@ eb64p_init_irq(void)
init_i8259a_irqs();
for (i = 16; i < 32; ++i) {
- irq_to_desc(i)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(i, &eb64p_irq_type, handle_level_irq);
- }
+ irq_set_status_flags(i, IRQ_LEVEL);
+ }
common_init_isa_dma();
setup_irq(16+5, &isa_cascade_irqaction);
diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c
index 1121bc5c6c6c..0b44a54c1522 100644
--- a/arch/alpha/kernel/sys_eiger.c
+++ b/arch/alpha/kernel/sys_eiger.c
@@ -51,16 +51,18 @@ eiger_update_irq_hw(unsigned long irq, unsigned long mask)
}
static inline void
-eiger_enable_irq(unsigned int irq)
+eiger_enable_irq(struct irq_data *d)
{
+ unsigned int irq = d->irq;
unsigned long mask;
mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63)));
eiger_update_irq_hw(irq, mask);
}
static void
-eiger_disable_irq(unsigned int irq)
+eiger_disable_irq(struct irq_data *d)
{
+ unsigned int irq = d->irq;
unsigned long mask;
mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63));
eiger_update_irq_hw(irq, mask);
@@ -68,9 +70,9 @@ eiger_disable_irq(unsigned int irq)
static struct irq_chip eiger_irq_type = {
.name = "EIGER",
- .unmask = eiger_enable_irq,
- .mask = eiger_disable_irq,
- .mask_ack = eiger_disable_irq,
+ .irq_unmask = eiger_enable_irq,
+ .irq_mask = eiger_disable_irq,
+ .irq_mask_ack = eiger_disable_irq,
};
static void
@@ -136,8 +138,8 @@ eiger_init_irq(void)
init_i8259a_irqs();
for (i = 16; i < 128; ++i) {
- irq_to_desc(i)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(i, &eiger_irq_type, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
}
}
diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c
index 34f55e03d331..00341b75c8b2 100644
--- a/arch/alpha/kernel/sys_jensen.c
+++ b/arch/alpha/kernel/sys_jensen.c
@@ -63,34 +63,34 @@
*/
static void
-jensen_local_enable(unsigned int irq)
+jensen_local_enable(struct irq_data *d)
{
/* the parport is really hw IRQ 1, silly Jensen. */
- if (irq == 7)
- i8259a_enable_irq(1);
+ if (d->irq == 7)
+ i8259a_enable_irq(d);
}
static void
-jensen_local_disable(unsigned int irq)
+jensen_local_disable(struct irq_data *d)
{
/* the parport is really hw IRQ 1, silly Jensen. */
- if (irq == 7)
- i8259a_disable_irq(1);
+ if (d->irq == 7)
+ i8259a_disable_irq(d);
}
static void
-jensen_local_mask_ack(unsigned int irq)
+jensen_local_mask_ack(struct irq_data *d)
{
/* the parport is really hw IRQ 1, silly Jensen. */
- if (irq == 7)
- i8259a_mask_and_ack_irq(1);
+ if (d->irq == 7)
+ i8259a_mask_and_ack_irq(d);
}
static struct irq_chip jensen_local_irq_type = {
.name = "LOCAL",
- .unmask = jensen_local_enable,
- .mask = jensen_local_disable,
- .mask_ack = jensen_local_mask_ack,
+ .irq_unmask = jensen_local_enable,
+ .irq_mask = jensen_local_disable,
+ .irq_mask_ack = jensen_local_mask_ack,
};
static void
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 2bfc9f1b1ddc..e61910734e41 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -104,9 +104,10 @@ io7_get_irq_ctl(unsigned int irq, struct io7 **pio7)
}
static void
-io7_enable_irq(unsigned int irq)
+io7_enable_irq(struct irq_data *d)
{
volatile unsigned long *ctl;
+ unsigned int irq = d->irq;
struct io7 *io7;
ctl = io7_get_irq_ctl(irq, &io7);
@@ -115,7 +116,7 @@ io7_enable_irq(unsigned int irq)
__func__, irq);
return;
}
-
+
spin_lock(&io7->irq_lock);
*ctl |= 1UL << 24;
mb();
@@ -124,9 +125,10 @@ io7_enable_irq(unsigned int irq)
}
static void
-io7_disable_irq(unsigned int irq)
+io7_disable_irq(struct irq_data *d)
{
volatile unsigned long *ctl;
+ unsigned int irq = d->irq;
struct io7 *io7;
ctl = io7_get_irq_ctl(irq, &io7);
@@ -135,7 +137,7 @@ io7_disable_irq(unsigned int irq)
__func__, irq);
return;
}
-
+
spin_lock(&io7->irq_lock);
*ctl &= ~(1UL << 24);
mb();
@@ -144,35 +146,29 @@ io7_disable_irq(unsigned int irq)
}
static void
-marvel_irq_noop(unsigned int irq)
-{
- return;
-}
-
-static unsigned int
-marvel_irq_noop_return(unsigned int irq)
-{
- return 0;
+marvel_irq_noop(struct irq_data *d)
+{
+ return;
}
static struct irq_chip marvel_legacy_irq_type = {
.name = "LEGACY",
- .mask = marvel_irq_noop,
- .unmask = marvel_irq_noop,
+ .irq_mask = marvel_irq_noop,
+ .irq_unmask = marvel_irq_noop,
};
static struct irq_chip io7_lsi_irq_type = {
.name = "LSI",
- .unmask = io7_enable_irq,
- .mask = io7_disable_irq,
- .mask_ack = io7_disable_irq,
+ .irq_unmask = io7_enable_irq,
+ .irq_mask = io7_disable_irq,
+ .irq_mask_ack = io7_disable_irq,
};
static struct irq_chip io7_msi_irq_type = {
.name = "MSI",
- .unmask = io7_enable_irq,
- .mask = io7_disable_irq,
- .ack = marvel_irq_noop,
+ .irq_unmask = io7_enable_irq,
+ .irq_mask = io7_disable_irq,
+ .irq_ack = marvel_irq_noop,
};
static void
@@ -280,8 +276,8 @@ init_io7_irqs(struct io7 *io7,
/* Set up the lsi irqs. */
for (i = 0; i < 128; ++i) {
- irq_to_desc(base + i)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(base + i, lsi_ops, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
}
/* Disable the implemented irqs in hardware. */
@@ -294,8 +290,8 @@ init_io7_irqs(struct io7 *io7,
/* Set up the msi irqs. */
for (i = 128; i < (128 + 512); ++i) {
- irq_to_desc(base + i)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(base + i, msi_ops, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
}
for (i = 0; i < 16; ++i)
diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c
index bcc1639e8efb..cf7f43dd3147 100644
--- a/arch/alpha/kernel/sys_mikasa.c
+++ b/arch/alpha/kernel/sys_mikasa.c
@@ -43,22 +43,22 @@ mikasa_update_irq_hw(int mask)
}
static inline void
-mikasa_enable_irq(unsigned int irq)
+mikasa_enable_irq(struct irq_data *d)
{
- mikasa_update_irq_hw(cached_irq_mask |= 1 << (irq - 16));
+ mikasa_update_irq_hw(cached_irq_mask |= 1 << (d->irq - 16));
}
static void
-mikasa_disable_irq(unsigned int irq)
+mikasa_disable_irq(struct irq_data *d)
{
- mikasa_update_irq_hw(cached_irq_mask &= ~(1 << (irq - 16)));
+ mikasa_update_irq_hw(cached_irq_mask &= ~(1 << (d->irq - 16)));
}
static struct irq_chip mikasa_irq_type = {
.name = "MIKASA",
- .unmask = mikasa_enable_irq,
- .mask = mikasa_disable_irq,
- .mask_ack = mikasa_disable_irq,
+ .irq_unmask = mikasa_enable_irq,
+ .irq_mask = mikasa_disable_irq,
+ .irq_mask_ack = mikasa_disable_irq,
};
static void
@@ -98,8 +98,8 @@ mikasa_init_irq(void)
mikasa_update_irq_hw(0);
for (i = 16; i < 32; ++i) {
- irq_to_desc(i)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(i, &mikasa_irq_type, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
}
init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c
index e88f4ae1260e..92bc188e94a9 100644
--- a/arch/alpha/kernel/sys_noritake.c
+++ b/arch/alpha/kernel/sys_noritake.c
@@ -48,22 +48,22 @@ noritake_update_irq_hw(int irq, int mask)
}
static void
-noritake_enable_irq(unsigned int irq)
+noritake_enable_irq(struct irq_data *d)
{
- noritake_update_irq_hw(irq, cached_irq_mask |= 1 << (irq - 16));
+ noritake_update_irq_hw(d->irq, cached_irq_mask |= 1 << (d->irq - 16));
}
static void
-noritake_disable_irq(unsigned int irq)
+noritake_disable_irq(struct irq_data *d)
{
- noritake_update_irq_hw(irq, cached_irq_mask &= ~(1 << (irq - 16)));
+ noritake_update_irq_hw(d->irq, cached_irq_mask &= ~(1 << (d->irq - 16)));
}
static struct irq_chip noritake_irq_type = {
.name = "NORITAKE",
- .unmask = noritake_enable_irq,
- .mask = noritake_disable_irq,
- .mask_ack = noritake_disable_irq,
+ .irq_unmask = noritake_enable_irq,
+ .irq_mask = noritake_disable_irq,
+ .irq_mask_ack = noritake_disable_irq,
};
static void
@@ -127,8 +127,8 @@ noritake_init_irq(void)
outw(0, 0x54c);
for (i = 16; i < 48; ++i) {
- irq_to_desc(i)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(i, &noritake_irq_type, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
}
init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c
index 6a51364dd1cc..936d4140ed5f 100644
--- a/arch/alpha/kernel/sys_rawhide.c
+++ b/arch/alpha/kernel/sys_rawhide.c
@@ -56,9 +56,10 @@ rawhide_update_irq_hw(int hose, int mask)
(((h) < MCPCIA_MAX_HOSES) && (cached_irq_masks[(h)] != 0))
static inline void
-rawhide_enable_irq(unsigned int irq)
+rawhide_enable_irq(struct irq_data *d)
{
unsigned int mask, hose;
+ unsigned int irq = d->irq;
irq -= 16;
hose = irq / 24;
@@ -76,9 +77,10 @@ rawhide_enable_irq(unsigned int irq)
}
static void
-rawhide_disable_irq(unsigned int irq)
+rawhide_disable_irq(struct irq_data *d)
{
unsigned int mask, hose;
+ unsigned int irq = d->irq;
irq -= 16;
hose = irq / 24;
@@ -96,9 +98,10 @@ rawhide_disable_irq(unsigned int irq)
}
static void
-rawhide_mask_and_ack_irq(unsigned int irq)
+rawhide_mask_and_ack_irq(struct irq_data *d)
{
unsigned int mask, mask1, hose;
+ unsigned int irq = d->irq;
irq -= 16;
hose = irq / 24;
@@ -123,9 +126,9 @@ rawhide_mask_and_ack_irq(unsigned int irq)
static struct irq_chip rawhide_irq_type = {
.name = "RAWHIDE",
- .unmask = rawhide_enable_irq,
- .mask = rawhide_disable_irq,
- .mask_ack = rawhide_mask_and_ack_irq,
+ .irq_unmask = rawhide_enable_irq,
+ .irq_mask = rawhide_disable_irq,
+ .irq_mask_ack = rawhide_mask_and_ack_irq,
};
static void
@@ -177,8 +180,8 @@ rawhide_init_irq(void)
}
for (i = 16; i < 128; ++i) {
- irq_to_desc(i)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(i, &rawhide_irq_type, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
}
init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c
index 89e7e37ec84c..cea22a62913b 100644
--- a/arch/alpha/kernel/sys_rx164.c
+++ b/arch/alpha/kernel/sys_rx164.c
@@ -47,22 +47,22 @@ rx164_update_irq_hw(unsigned long mask)
}
static inline void
-rx164_enable_irq(unsigned int irq)
+rx164_enable_irq(struct irq_data *d)
{
- rx164_update_irq_hw(cached_irq_mask |= 1UL << (irq - 16));
+ rx164_update_irq_hw(cached_irq_mask |= 1UL << (d->irq - 16));
}
static void
-rx164_disable_irq(unsigned int irq)
+rx164_disable_irq(struct irq_data *d)
{
- rx164_update_irq_hw(cached_irq_mask &= ~(1UL << (irq - 16)));
+ rx164_update_irq_hw(cached_irq_mask &= ~(1UL << (d->irq - 16)));
}
static struct irq_chip rx164_irq_type = {
.name = "RX164",
- .unmask = rx164_enable_irq,
- .mask = rx164_disable_irq,
- .mask_ack = rx164_disable_irq,
+ .irq_unmask = rx164_enable_irq,
+ .irq_mask = rx164_disable_irq,
+ .irq_mask_ack = rx164_disable_irq,
};
static void
@@ -99,8 +99,8 @@ rx164_init_irq(void)
rx164_update_irq_hw(0);
for (i = 16; i < 40; ++i) {
- irq_to_desc(i)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(i, &rx164_irq_type, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
}
init_i8259a_irqs();
diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
index 5c4423d1b06c..a349538aabc9 100644
--- a/arch/alpha/kernel/sys_sable.c
+++ b/arch/alpha/kernel/sys_sable.c
@@ -443,11 +443,11 @@ lynx_swizzle(struct pci_dev *dev, u8 *pinp)
/* GENERIC irq routines */
static inline void
-sable_lynx_enable_irq(unsigned int irq)
+sable_lynx_enable_irq(struct irq_data *d)
{
unsigned long bit, mask;
- bit = sable_lynx_irq_swizzle->irq_to_mask[irq];
+ bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq];
spin_lock(&sable_lynx_irq_lock);
mask = sable_lynx_irq_swizzle->shadow_mask &= ~(1UL << bit);
sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
@@ -459,11 +459,11 @@ sable_lynx_enable_irq(unsigned int irq)
}
static void
-sable_lynx_disable_irq(unsigned int irq)
+sable_lynx_disable_irq(struct irq_data *d)
{
unsigned long bit, mask;
- bit = sable_lynx_irq_swizzle->irq_to_mask[irq];
+ bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq];
spin_lock(&sable_lynx_irq_lock);
mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit;
sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
@@ -475,11 +475,11 @@ sable_lynx_disable_irq(unsigned int irq)
}
static void
-sable_lynx_mask_and_ack_irq(unsigned int irq)
+sable_lynx_mask_and_ack_irq(struct irq_data *d)
{
unsigned long bit, mask;
- bit = sable_lynx_irq_swizzle->irq_to_mask[irq];
+ bit = sable_lynx_irq_swizzle->irq_to_mask[d->irq];
spin_lock(&sable_lynx_irq_lock);
mask = sable_lynx_irq_swizzle->shadow_mask |= 1UL << bit;
sable_lynx_irq_swizzle->update_irq_hw(bit, mask);
@@ -489,9 +489,9 @@ sable_lynx_mask_and_ack_irq(unsigned int irq)
static struct irq_chip sable_lynx_irq_type = {
.name = "SABLE/LYNX",
- .unmask = sable_lynx_enable_irq,
- .mask = sable_lynx_disable_irq,
- .mask_ack = sable_lynx_mask_and_ack_irq,
+ .irq_unmask = sable_lynx_enable_irq,
+ .irq_mask = sable_lynx_disable_irq,
+ .irq_mask_ack = sable_lynx_mask_and_ack_irq,
};
static void
@@ -518,9 +518,9 @@ sable_lynx_init_irq(int nr_of_irqs)
long i;
for (i = 0; i < nr_of_irqs; ++i) {
- irq_to_desc(i)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(i, &sable_lynx_irq_type,
handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
}
common_init_isa_dma();
diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c
index f8a1e8a862fb..42a5331f13c4 100644
--- a/arch/alpha/kernel/sys_takara.c
+++ b/arch/alpha/kernel/sys_takara.c
@@ -45,16 +45,18 @@ takara_update_irq_hw(unsigned long irq, unsigned long mask)
}
static inline void
-takara_enable_irq(unsigned int irq)
+takara_enable_irq(struct irq_data *d)
{
+ unsigned int irq = d->irq;
unsigned long mask;
mask = (cached_irq_mask[irq >= 64] &= ~(1UL << (irq & 63)));
takara_update_irq_hw(irq, mask);
}
static void
-takara_disable_irq(unsigned int irq)
+takara_disable_irq(struct irq_data *d)
{
+ unsigned int irq = d->irq;
unsigned long mask;
mask = (cached_irq_mask[irq >= 64] |= 1UL << (irq & 63));
takara_update_irq_hw(irq, mask);
@@ -62,9 +64,9 @@ takara_disable_irq(unsigned int irq)
static struct irq_chip takara_irq_type = {
.name = "TAKARA",
- .unmask = takara_enable_irq,
- .mask = takara_disable_irq,
- .mask_ack = takara_disable_irq,
+ .irq_unmask = takara_enable_irq,
+ .irq_mask = takara_disable_irq,
+ .irq_mask_ack = takara_disable_irq,
};
static void
@@ -136,8 +138,8 @@ takara_init_irq(void)
takara_update_irq_hw(i, -1);
for (i = 16; i < 128; ++i) {
- irq_to_desc(i)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(i, &takara_irq_type, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
}
common_init_isa_dma();
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index e02494bf5ef3..8c13a0c77830 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -112,8 +112,9 @@ titan_update_irq_hw(unsigned long mask)
}
static inline void
-titan_enable_irq(unsigned int irq)
+titan_enable_irq(struct irq_data *d)
{
+ unsigned int irq = d->irq;
spin_lock(&titan_irq_lock);
titan_cached_irq_mask |= 1UL << (irq - 16);
titan_update_irq_hw(titan_cached_irq_mask);
@@ -121,8 +122,9 @@ titan_enable_irq(unsigned int irq)
}
static inline void
-titan_disable_irq(unsigned int irq)
+titan_disable_irq(struct irq_data *d)
{
+ unsigned int irq = d->irq;
spin_lock(&titan_irq_lock);
titan_cached_irq_mask &= ~(1UL << (irq - 16));
titan_update_irq_hw(titan_cached_irq_mask);
@@ -144,8 +146,10 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
}
static int
-titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
+titan_set_irq_affinity(struct irq_data *d, const struct cpumask *affinity,
+ bool force)
{
+ unsigned int irq = d->irq;
spin_lock(&titan_irq_lock);
titan_cpu_set_irq_affinity(irq - 16, *affinity);
titan_update_irq_hw(titan_cached_irq_mask);
@@ -175,17 +179,17 @@ init_titan_irqs(struct irq_chip * ops, int imin, int imax)
{
long i;
for (i = imin; i <= imax; ++i) {
- irq_to_desc(i)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(i, ops, handle_level_irq);
+ irq_set_status_flags(i, IRQ_LEVEL);
}
}
static struct irq_chip titan_irq_type = {
- .name = "TITAN",
- .unmask = titan_enable_irq,
- .mask = titan_disable_irq,
- .mask_ack = titan_disable_irq,
- .set_affinity = titan_set_irq_affinity,
+ .name = "TITAN",
+ .irq_unmask = titan_enable_irq,
+ .irq_mask = titan_disable_irq,
+ .irq_mask_ack = titan_disable_irq,
+ .irq_set_affinity = titan_set_irq_affinity,
};
static irqreturn_t
diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c
index eec52594d410..ca60a387ef0a 100644
--- a/arch/alpha/kernel/sys_wildfire.c
+++ b/arch/alpha/kernel/sys_wildfire.c
@@ -104,10 +104,12 @@ wildfire_init_irq_hw(void)
}
static void
-wildfire_enable_irq(unsigned int irq)
+wildfire_enable_irq(struct irq_data *d)
{
+ unsigned int irq = d->irq;
+
if (irq < 16)
- i8259a_enable_irq(irq);
+ i8259a_enable_irq(d);
spin_lock(&wildfire_irq_lock);
set_bit(irq, &cached_irq_mask);
@@ -116,10 +118,12 @@ wildfire_enable_irq(unsigned int irq)
}
static void
-wildfire_disable_irq(unsigned int irq)
+wildfire_disable_irq(struct irq_data *d)
{
+ unsigned int irq = d->irq;
+
if (irq < 16)
- i8259a_disable_irq(irq);
+ i8259a_disable_irq(d);
spin_lock(&wildfire_irq_lock);
clear_bit(irq, &cached_irq_mask);
@@ -128,10 +132,12 @@ wildfire_disable_irq(unsigned int irq)
}
static void
-wildfire_mask_and_ack_irq(unsigned int irq)
+wildfire_mask_and_ack_irq(struct irq_data *d)
{
+ unsigned int irq = d->irq;
+
if (irq < 16)
- i8259a_mask_and_ack_irq(irq);
+ i8259a_mask_and_ack_irq(d);
spin_lock(&wildfire_irq_lock);
clear_bit(irq, &cached_irq_mask);
@@ -141,9 +147,9 @@ wildfire_mask_and_ack_irq(unsigned int irq)
static struct irq_chip wildfire_irq_type = {
.name = "WILDFIRE",
- .unmask = wildfire_enable_irq,
- .mask = wildfire_disable_irq,
- .mask_ack = wildfire_mask_and_ack_irq,
+ .irq_unmask = wildfire_enable_irq,
+ .irq_mask = wildfire_disable_irq,
+ .irq_mask_ack = wildfire_mask_and_ack_irq,
};
static void __init
@@ -177,21 +183,21 @@ wildfire_init_irq_per_pca(int qbbno, int pcano)
for (i = 0; i < 16; ++i) {
if (i == 2)
continue;
- irq_to_desc(i+irq_bias)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type,
handle_level_irq);
+ irq_set_status_flags(i + irq_bias, IRQ_LEVEL);
}
- irq_to_desc(36+irq_bias)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(36+irq_bias, &wildfire_irq_type,
handle_level_irq);
+ irq_set_status_flags(36 + irq_bias, IRQ_LEVEL);
for (i = 40; i < 64; ++i) {
- irq_to_desc(i+irq_bias)->status |= IRQ_LEVEL;
set_irq_chip_and_handler(i+irq_bias, &wildfire_irq_type,
handle_level_irq);
+ irq_set_status_flags(i + irq_bias, IRQ_LEVEL);
}
- setup_irq(32+irq_bias, &isa_enable);
+ setup_irq(32+irq_bias, &isa_enable);
}
static void __init
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index c1f3e7cb82a4..a58e84f1a63b 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -159,7 +159,7 @@ void read_persistent_clock(struct timespec *ts)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
irqreturn_t timer_interrupt(int irq, void *dev)
{
@@ -172,8 +172,6 @@ irqreturn_t timer_interrupt(int irq, void *dev)
profile_tick(CPU_PROFILING);
#endif
- write_seqlock(&xtime_lock);
-
/*
* Calculate how many ticks have passed since the last update,
* including any previous partial leftover. Save any resulting
@@ -187,9 +185,7 @@ irqreturn_t timer_interrupt(int irq, void *dev)
nticks = delta >> FIX_SHIFT;
if (nticks)
- do_timer(nticks);
-
- write_sequnlock(&xtime_lock);
+ xtime_update(nticks);
if (test_irq_work_pending()) {
clear_irq_work_pending();
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 003ef4c02585..433be2a24f31 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -1,5 +1,6 @@
#include <asm-generic/vmlinux.lds.h>
#include <asm/thread_info.h>
+#include <asm/cache.h>
#include <asm/page.h>
OUTPUT_FORMAT("elf64-alpha")
@@ -38,7 +39,7 @@ SECTIONS
__init_begin = ALIGN(PAGE_SIZE);
INIT_TEXT_SECTION(PAGE_SIZE)
INIT_DATA_SECTION(16)
- PERCPU(PAGE_SIZE)
+ PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
/* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page
needed for the THREAD_SIZE aligned init_task gets freed after init */
. = ALIGN(THREAD_SIZE);
@@ -46,7 +47,7 @@ SECTIONS
/* Freed after init ends here */
_data = .;
- RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
.got : {
*(.got)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 26d45e5b636b..166efa2a19cd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1177,6 +1177,31 @@ config ARM_ERRATA_743622
visible impact on the overall performance or power consumption of the
processor.
+config ARM_ERRATA_751472
+ bool "ARM errata: Interrupted ICIALLUIS may prevent completion of broadcasted operation"
+ depends on CPU_V7 && SMP
+ help
+ This option enables the workaround for the 751472 Cortex-A9 (prior
+ to r3p0) erratum. An interrupted ICIALLUIS operation may prevent the
+ completion of a following broadcasted operation if the second
+ operation is received by a CPU before the ICIALLUIS has completed,
+ potentially leading to corrupted entries in the cache or TLB.
+
+config ARM_ERRATA_753970
+ bool "ARM errata: cache sync operation may be faulty"
+ depends on CACHE_PL310
+ help
+ This option enables the workaround for the 753970 PL310 (r3p0) erratum.
+
+ Under some condition the effect of cache sync operation on
+ the store buffer still remains when the operation completes.
+ This means that the store buffer is always asked to drain and
+ this prevents it from merging any further writes. The workaround
+ is to replace the normal offset of cache sync operation (0x730)
+ by another offset targeting an unmapped PL310 register 0x740.
+ This has the same effect as the cache sync operation: store buffer
+ drain and waiting for all buffers empty.
+
endmenu
source "arch/arm/common/Kconfig"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index c22c1adfedd6..6f7b29294c80 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -15,7 +15,7 @@ ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
LDFLAGS_vmlinux += --be8
endif
-OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
+OBJCOPYFLAGS :=-O binary -R .comment -S
GZFLAGS :=-9
#KBUILD_CFLAGS +=-pipe
# Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb:
diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore
index ab204db594d3..c6028967d336 100644
--- a/arch/arm/boot/compressed/.gitignore
+++ b/arch/arm/boot/compressed/.gitignore
@@ -1,3 +1,7 @@
font.c
-piggy.gz
+lib1funcs.S
+piggy.gzip
+piggy.lzo
+piggy.lzma
+vmlinux
vmlinux.lds
diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index 778655f0257a..ea5ee4d067f3 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -6,6 +6,8 @@ config ARM_VIC
config ARM_VIC_NR
int
+ default 4 if ARCH_S5PV210
+ default 3 if ARCH_S5P6442 || ARCH_S5PC100
default 2
depends on ARM_VIC
help
diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h
index b33fe7065b38..199a6b6de7f4 100644
--- a/arch/arm/include/asm/futex.h
+++ b/arch/arm/include/asm/futex.h
@@ -35,7 +35,7 @@
: "cc", "memory")
static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -46,7 +46,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable(); /* implies preempt_disable() */
@@ -88,36 +88,35 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- int val;
+ int ret = 0;
+ u32 val;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- pagefault_disable(); /* implies preempt_disable() */
-
__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
- "1: " T(ldr) " %0, [%3]\n"
- " teq %0, %1\n"
+ "1: " T(ldr) " %1, [%4]\n"
+ " teq %1, %2\n"
" it eq @ explicit IT needed for the 2b label\n"
- "2: " T(streq) " %2, [%3]\n"
+ "2: " T(streq) " %3, [%4]\n"
"3:\n"
" .pushsection __ex_table,\"a\"\n"
" .align 3\n"
" .long 1b, 4f, 2b, 4f\n"
" .popsection\n"
" .pushsection .fixup,\"ax\"\n"
- "4: mov %0, %4\n"
+ "4: mov %0, %5\n"
" b 3b\n"
" .popsection"
- : "=&r" (val)
+ : "+r" (ret), "=&r" (val)
: "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT)
: "cc", "memory");
- pagefault_enable(); /* subsumes preempt_enable() */
-
- return val;
+ *uval = val;
+ return ret;
}
#endif /* !SMP */
diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h
index 5aeec1e1735c..16bd48031583 100644
--- a/arch/arm/include/asm/hardware/cache-l2x0.h
+++ b/arch/arm/include/asm/hardware/cache-l2x0.h
@@ -36,6 +36,7 @@
#define L2X0_RAW_INTR_STAT 0x21C
#define L2X0_INTR_CLEAR 0x220
#define L2X0_CACHE_SYNC 0x730
+#define L2X0_DUMMY_REG 0x740
#define L2X0_INV_LINE_PA 0x770
#define L2X0_INV_WAY 0x77C
#define L2X0_CLEAN_LINE_PA 0x7B0
diff --git a/arch/arm/include/asm/hardware/sp810.h b/arch/arm/include/asm/hardware/sp810.h
index 721847dc68ab..e0d1c0cfa548 100644
--- a/arch/arm/include/asm/hardware/sp810.h
+++ b/arch/arm/include/asm/hardware/sp810.h
@@ -58,6 +58,9 @@
static inline void sysctl_soft_reset(void __iomem *base)
{
+ /* switch to slow mode */
+ writel(0x2, base + SCCTRL);
+
/* writing any value to SCSYSSTAT reg will reset system */
writel(0, base + SCSYSSTAT);
}
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 3a0893a76a3b..bf13b814c1b8 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -15,10 +15,6 @@ struct meminfo;
struct sys_timer;
struct machine_desc {
- /*
- * Note! The first two elements are used
- * by assembler code in head.S, head-common.S
- */
unsigned int nr; /* architecture number */
const char *name; /* architecture name */
unsigned long boot_params; /* tagged list */
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 9763be04f77e..22de005f159c 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -10,6 +10,8 @@
#ifndef _ASMARM_PGALLOC_H
#define _ASMARM_PGALLOC_H
+#include <linux/pagemap.h>
+
#include <asm/domain.h>
#include <asm/pgtable-hwdef.h>
#include <asm/processor.h>
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index f41a6f57cd12..82dfe5d0c41e 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -18,16 +18,34 @@
#define __ASMARM_TLB_H
#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
#ifndef CONFIG_MMU
#include <linux/pagemap.h>
+
+#define tlb_flush(tlb) ((void) tlb)
+
#include <asm-generic/tlb.h>
#else /* !CONFIG_MMU */
+#include <linux/swap.h>
#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+/*
+ * We need to delay page freeing for SMP as other CPUs can access pages
+ * which have been removed but not yet had their TLB entries invalidated.
+ * Also, as ARMv7 speculative prefetch can drag new entries into the TLB,
+ * we need to apply this same delaying tactic to ensure correct operation.
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_CPU_32v7)
+#define tlb_fast_mode(tlb) 0
+#define FREE_PTE_NR 500
+#else
+#define tlb_fast_mode(tlb) 1
+#define FREE_PTE_NR 0
+#endif
/*
* TLB handling. This allows us to remove pages from the page
@@ -36,12 +54,58 @@
struct mmu_gather {
struct mm_struct *mm;
unsigned int fullmm;
+ struct vm_area_struct *vma;
unsigned long range_start;
unsigned long range_end;
+ unsigned int nr;
+ struct page *pages[FREE_PTE_NR];
};
DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
+/*
+ * This is unnecessarily complex. There's three ways the TLB shootdown
+ * code is used:
+ * 1. Unmapping a range of vmas. See zap_page_range(), unmap_region().
+ * tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called.
+ * tlb->vma will be non-NULL.
+ * 2. Unmapping all vmas. See exit_mmap().
+ * tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called.
+ * tlb->vma will be non-NULL. Additionally, page tables will be freed.
+ * 3. Unmapping argument pages. See shift_arg_pages().
+ * tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called.
+ * tlb->vma will be NULL.
+ */
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+ if (tlb->fullmm || !tlb->vma)
+ flush_tlb_mm(tlb->mm);
+ else if (tlb->range_end > 0) {
+ flush_tlb_range(tlb->vma, tlb->range_start, tlb->range_end);
+ tlb->range_start = TASK_SIZE;
+ tlb->range_end = 0;
+ }
+}
+
+static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr)
+{
+ if (!tlb->fullmm) {
+ if (addr < tlb->range_start)
+ tlb->range_start = addr;
+ if (addr + PAGE_SIZE > tlb->range_end)
+ tlb->range_end = addr + PAGE_SIZE;
+ }
+}
+
+static inline void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+ tlb_flush(tlb);
+ if (!tlb_fast_mode(tlb)) {
+ free_pages_and_swap_cache(tlb->pages, tlb->nr);
+ tlb->nr = 0;
+ }
+}
+
static inline struct mmu_gather *
tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
{
@@ -49,6 +113,8 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
tlb->mm = mm;
tlb->fullmm = full_mm_flush;
+ tlb->vma = NULL;
+ tlb->nr = 0;
return tlb;
}
@@ -56,8 +122,7 @@ tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
static inline void
tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
{
- if (tlb->fullmm)
- flush_tlb_mm(tlb->mm);
+ tlb_flush_mmu(tlb);
/* keep the page table cache within bounds */
check_pgt_cache();
@@ -71,12 +136,7 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
static inline void
tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
{
- if (!tlb->fullmm) {
- if (addr < tlb->range_start)
- tlb->range_start = addr;
- if (addr + PAGE_SIZE > tlb->range_end)
- tlb->range_end = addr + PAGE_SIZE;
- }
+ tlb_add_flush(tlb, addr);
}
/*
@@ -89,6 +149,7 @@ tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
{
if (!tlb->fullmm) {
flush_cache_range(vma, vma->vm_start, vma->vm_end);
+ tlb->vma = vma;
tlb->range_start = TASK_SIZE;
tlb->range_end = 0;
}
@@ -97,12 +158,30 @@ tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
static inline void
tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
{
- if (!tlb->fullmm && tlb->range_end > 0)
- flush_tlb_range(vma, tlb->range_start, tlb->range_end);
+ if (!tlb->fullmm)
+ tlb_flush(tlb);
+}
+
+static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+{
+ if (tlb_fast_mode(tlb)) {
+ free_page_and_swap_cache(page);
+ } else {
+ tlb->pages[tlb->nr++] = page;
+ if (tlb->nr >= FREE_PTE_NR)
+ tlb_flush_mmu(tlb);
+ }
+}
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
+ unsigned long addr)
+{
+ pgtable_page_dtor(pte);
+ tlb_add_flush(tlb, addr);
+ tlb_remove_page(tlb, pte);
}
-#define tlb_remove_page(tlb,page) free_page_and_swap_cache(page)
-#define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep)
+#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
#define pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, pmdp)
#define tlb_migrate_finish(mm) do { } while (0)
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index ce7378ea15a2..d2005de383b8 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -10,12 +10,7 @@
#ifndef _ASMARM_TLBFLUSH_H
#define _ASMARM_TLBFLUSH_H
-
-#ifndef CONFIG_MMU
-
-#define tlb_flush(tlb) ((void) tlb)
-
-#else /* CONFIG_MMU */
+#ifdef CONFIG_MMU
#include <asm/glue.h>
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index d600bd350704..44b84fe6e1b0 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -836,9 +836,11 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
/*
* One-time initialisation.
*/
-static void reset_ctrl_regs(void *unused)
+static void reset_ctrl_regs(void *info)
{
- int i;
+ int i, cpu = smp_processor_id();
+ u32 dbg_power;
+ cpumask_t *cpumask = info;
/*
* v7 debug contains save and restore registers so that debug state
@@ -850,6 +852,17 @@ static void reset_ctrl_regs(void *unused)
*/
if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) {
/*
+ * Ensure sticky power-down is clear (i.e. debug logic is
+ * powered up).
+ */
+ asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (dbg_power));
+ if ((dbg_power & 0x1) == 0) {
+ pr_warning("CPU %d debug is powered down!\n", cpu);
+ cpumask_or(cpumask, cpumask, cpumask_of(cpu));
+ return;
+ }
+
+ /*
* Unconditionally clear the lock by writing a value
* other than 0xC5ACCE55 to the access register.
*/
@@ -887,6 +900,7 @@ static struct notifier_block __cpuinitdata dbg_reset_nb = {
static int __init arch_hw_breakpoint_init(void)
{
u32 dscr;
+ cpumask_t cpumask = { CPU_BITS_NONE };
debug_arch = get_debug_arch();
@@ -911,7 +925,13 @@ static int __init arch_hw_breakpoint_init(void)
* Reset the breakpoint resources. We assume that a halting
* debugger will leave the world in a nice state for us.
*/
- on_each_cpu(reset_ctrl_regs, NULL, 1);
+ on_each_cpu(reset_ctrl_regs, &cpumask, 1);
+ if (!cpumask_empty(&cpumask)) {
+ core_num_brps = 0;
+ core_num_reserved_brps = 0;
+ core_num_wrps = 0;
+ return 0;
+ }
ARM_DBG_READ(c1, 0, dscr);
if (dscr & ARM_DSCR_HDBGEN) {
diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c
index 2c1f0050c9c4..8f6ed43861f1 100644
--- a/arch/arm/kernel/kprobes-decode.c
+++ b/arch/arm/kernel/kprobes-decode.c
@@ -1437,7 +1437,7 @@ arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
return space_cccc_1100_010x(insn, asi);
- } else if ((insn & 0x0e000000) == 0x0c400000) {
+ } else if ((insn & 0x0e000000) == 0x0c000000) {
return space_cccc_110x(insn, asi);
diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c
index b8af96ea62e6..2c79eec19262 100644
--- a/arch/arm/kernel/pmu.c
+++ b/arch/arm/kernel/pmu.c
@@ -97,28 +97,34 @@ set_irq_affinity(int irq,
irq, cpu);
return err;
#else
- return 0;
+ return -EINVAL;
#endif
}
static int
init_cpu_pmu(void)
{
- int i, err = 0;
+ int i, irqs, err = 0;
struct platform_device *pdev = pmu_devices[ARM_PMU_DEVICE_CPU];
- if (!pdev) {
- err = -ENODEV;
- goto out;
- }
+ if (!pdev)
+ return -ENODEV;
+
+ irqs = pdev->num_resources;
+
+ /*
+ * If we have a single PMU interrupt that we can't shift, assume that
+ * we're running on a uniprocessor machine and continue.
+ */
+ if (irqs == 1 && !irq_can_set_affinity(platform_get_irq(pdev, 0)))
+ return 0;
- for (i = 0; i < pdev->num_resources; ++i) {
+ for (i = 0; i < irqs; ++i) {
err = set_irq_affinity(platform_get_irq(pdev, i), i);
if (err)
break;
}
-out:
return err;
}
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 19c6816db61e..b13e70f63d71 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -996,10 +996,10 @@ static int ptrace_gethbpregs(struct task_struct *tsk, long num,
while (!(arch_ctrl.len & 0x1))
arch_ctrl.len >>= 1;
- if (idx & 0x1)
- reg = encode_ctrl_reg(arch_ctrl);
- else
+ if (num & 0x1)
reg = bp->attr.bp_addr;
+ else
+ reg = encode_ctrl_reg(arch_ctrl);
}
put:
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 420b8d6485d6..5ea4fb718b97 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -226,8 +226,8 @@ int cpu_architecture(void)
* Register 0 and check for VMSAv7 or PMSAv7 */
asm("mrc p15, 0, %0, c0, c1, 4"
: "=r" (mmfr0));
- if ((mmfr0 & 0x0000000f) == 0x00000003 ||
- (mmfr0 & 0x000000f0) == 0x00000030)
+ if ((mmfr0 & 0x0000000f) >= 0x00000003 ||
+ (mmfr0 & 0x000000f0) >= 0x00000030)
cpu_arch = CPU_ARCH_ARMv7;
else if ((mmfr0 & 0x0000000f) == 0x00000002 ||
(mmfr0 & 0x000000f0) == 0x00000020)
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 907d5a620bca..abaf8445ce25 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -474,7 +474,9 @@ setup_return(struct pt_regs *regs, struct k_sigaction *ka,
unsigned long handler = (unsigned long)ka->sa.sa_handler;
unsigned long retcode;
int thumb = 0;
- unsigned long cpsr = regs->ARM_cpsr & ~PSR_f;
+ unsigned long cpsr = regs->ARM_cpsr & ~(PSR_f | PSR_E_BIT);
+
+ cpsr |= PSR_ENDSTATE;
/*
* Maybe we need to deliver a 32-bit signal to a 26-bit task.
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 3d76bf233734..1ff46cabc7ef 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -107,9 +107,7 @@ void timer_tick(void)
{
profile_tick(CPU_PROFILING);
do_leds();
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
#endif
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 86b66f3f2031..28fea9b2d129 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -21,6 +21,12 @@
#define ARM_CPU_KEEP(x)
#endif
+#if defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)
+#define ARM_EXIT_KEEP(x) x
+#else
+#define ARM_EXIT_KEEP(x)
+#endif
+
OUTPUT_ARCH(arm)
ENTRY(stext)
@@ -43,6 +49,7 @@ SECTIONS
_sinittext = .;
HEAD_TEXT
INIT_TEXT
+ ARM_EXIT_KEEP(EXIT_TEXT)
_einittext = .;
ARM_CPU_DISCARD(PROC_INFO)
__arch_info_begin = .;
@@ -67,10 +74,11 @@ SECTIONS
#ifndef CONFIG_XIP_KERNEL
__init_begin = _stext;
INIT_DATA
+ ARM_EXIT_KEEP(EXIT_DATA)
#endif
}
- PERCPU(PAGE_SIZE)
+ PERCPU(32, PAGE_SIZE)
#ifndef CONFIG_XIP_KERNEL
. = ALIGN(PAGE_SIZE);
@@ -162,6 +170,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__init_begin = .;
INIT_DATA
+ ARM_EXIT_KEEP(EXIT_DATA)
. = ALIGN(PAGE_SIZE);
__init_end = .;
#endif
@@ -247,6 +256,8 @@ SECTIONS
}
#endif
+ NOTES
+
BSS_SECTION(0, 0, 0)
_end = .;
diff --git a/arch/arm/mach-clps711x/include/mach/time.h b/arch/arm/mach-clps711x/include/mach/time.h
index 8fe283ccd1f3..61fef9129c6a 100644
--- a/arch/arm/mach-clps711x/include/mach/time.h
+++ b/arch/arm/mach-clps711x/include/mach/time.h
@@ -30,7 +30,7 @@ p720t_timer_interrupt(int irq, void *dev_id)
{
struct pt_regs *regs = get_irq_regs();
do_leds();
- do_timer(1);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(regs));
#endif
diff --git a/arch/arm/mach-davinci/cpufreq.c b/arch/arm/mach-davinci/cpufreq.c
index 343de73161fa..4a68c2b1ec11 100644
--- a/arch/arm/mach-davinci/cpufreq.c
+++ b/arch/arm/mach-davinci/cpufreq.c
@@ -132,7 +132,7 @@ out:
return ret;
}
-static int __init davinci_cpu_init(struct cpufreq_policy *policy)
+static int davinci_cpu_init(struct cpufreq_policy *policy)
{
int result = 0;
struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data;
diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index 9eec63070e0c..beda8a4133a0 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -480,8 +480,15 @@ static struct platform_device da850_mcasp_device = {
.resource = da850_mcasp_resources,
};
+struct platform_device davinci_pcm_device = {
+ .name = "davinci-pcm-audio",
+ .id = -1,
+};
+
void __init da8xx_register_mcasp(int id, struct snd_platform_data *pdata)
{
+ platform_device_register(&davinci_pcm_device);
+
/* DA830/OMAP-L137 has 3 instances of McASP */
if (cpu_is_davinci_da830() && id == 1) {
da830_mcasp1_device.dev.platform_data = pdata;
diff --git a/arch/arm/mach-davinci/gpio-tnetv107x.c b/arch/arm/mach-davinci/gpio-tnetv107x.c
index d10298620e2c..3fa3e2867e19 100644
--- a/arch/arm/mach-davinci/gpio-tnetv107x.c
+++ b/arch/arm/mach-davinci/gpio-tnetv107x.c
@@ -58,7 +58,7 @@ static int tnetv107x_gpio_request(struct gpio_chip *chip, unsigned offset)
spin_lock_irqsave(&ctlr->lock, flags);
- gpio_reg_set_bit(&regs->enable, gpio);
+ gpio_reg_set_bit(regs->enable, gpio);
spin_unlock_irqrestore(&ctlr->lock, flags);
@@ -74,7 +74,7 @@ static void tnetv107x_gpio_free(struct gpio_chip *chip, unsigned offset)
spin_lock_irqsave(&ctlr->lock, flags);
- gpio_reg_clear_bit(&regs->enable, gpio);
+ gpio_reg_clear_bit(regs->enable, gpio);
spin_unlock_irqrestore(&ctlr->lock, flags);
}
@@ -88,7 +88,7 @@ static int tnetv107x_gpio_dir_in(struct gpio_chip *chip, unsigned offset)
spin_lock_irqsave(&ctlr->lock, flags);
- gpio_reg_set_bit(&regs->direction, gpio);
+ gpio_reg_set_bit(regs->direction, gpio);
spin_unlock_irqrestore(&ctlr->lock, flags);
@@ -106,11 +106,11 @@ static int tnetv107x_gpio_dir_out(struct gpio_chip *chip,
spin_lock_irqsave(&ctlr->lock, flags);
if (value)
- gpio_reg_set_bit(&regs->data_out, gpio);
+ gpio_reg_set_bit(regs->data_out, gpio);
else
- gpio_reg_clear_bit(&regs->data_out, gpio);
+ gpio_reg_clear_bit(regs->data_out, gpio);
- gpio_reg_clear_bit(&regs->direction, gpio);
+ gpio_reg_clear_bit(regs->direction, gpio);
spin_unlock_irqrestore(&ctlr->lock, flags);
@@ -124,7 +124,7 @@ static int tnetv107x_gpio_get(struct gpio_chip *chip, unsigned offset)
unsigned gpio = chip->base + offset;
int ret;
- ret = gpio_reg_get_bit(&regs->data_in, gpio);
+ ret = gpio_reg_get_bit(regs->data_in, gpio);
return ret ? 1 : 0;
}
@@ -140,9 +140,9 @@ static void tnetv107x_gpio_set(struct gpio_chip *chip,
spin_lock_irqsave(&ctlr->lock, flags);
if (value)
- gpio_reg_set_bit(&regs->data_out, gpio);
+ gpio_reg_set_bit(regs->data_out, gpio);
else
- gpio_reg_clear_bit(&regs->data_out, gpio);
+ gpio_reg_clear_bit(regs->data_out, gpio);
spin_unlock_irqrestore(&ctlr->lock, flags);
}
diff --git a/arch/arm/mach-davinci/include/mach/clkdev.h b/arch/arm/mach-davinci/include/mach/clkdev.h
index 730c49d1ebd8..14a504887189 100644
--- a/arch/arm/mach-davinci/include/mach/clkdev.h
+++ b/arch/arm/mach-davinci/include/mach/clkdev.h
@@ -1,6 +1,8 @@
#ifndef __MACH_CLKDEV_H
#define __MACH_CLKDEV_H
+struct clk;
+
static inline int __clk_get(struct clk *clk)
{
return 1;
diff --git a/arch/arm/mach-omap2/clkt_dpll.c b/arch/arm/mach-omap2/clkt_dpll.c
index 337392c3f549..acb7ae5b0a25 100644
--- a/arch/arm/mach-omap2/clkt_dpll.c
+++ b/arch/arm/mach-omap2/clkt_dpll.c
@@ -77,7 +77,7 @@ static int _dpll_test_fint(struct clk *clk, u8 n)
dd = clk->dpll_data;
/* DPLL divider must result in a valid jitter correction val */
- fint = clk->parent->rate / (n + 1);
+ fint = clk->parent->rate / n;
if (fint < DPLL_FINT_BAND1_MIN) {
pr_debug("rejecting n=%d due to Fint failure, "
diff --git a/arch/arm/mach-omap2/mailbox.c b/arch/arm/mach-omap2/mailbox.c
index 394413dc7deb..24b88504df0f 100644
--- a/arch/arm/mach-omap2/mailbox.c
+++ b/arch/arm/mach-omap2/mailbox.c
@@ -193,10 +193,12 @@ static void omap2_mbox_disable_irq(struct omap_mbox *mbox,
omap_mbox_type_t irq)
{
struct omap_mbox2_priv *p = mbox->priv;
- u32 l, bit = (irq == IRQ_TX) ? p->notfull_bit : p->newmsg_bit;
- l = mbox_read_reg(p->irqdisable);
- l &= ~bit;
- mbox_write_reg(l, p->irqdisable);
+ u32 bit = (irq == IRQ_TX) ? p->notfull_bit : p->newmsg_bit;
+
+ if (!cpu_is_omap44xx())
+ bit = mbox_read_reg(p->irqdisable) & ~bit;
+
+ mbox_write_reg(bit, p->irqdisable);
}
static void omap2_mbox_ack_irq(struct omap_mbox *mbox,
@@ -334,7 +336,7 @@ static struct omap_mbox mbox_iva_info = {
.priv = &omap2_mbox_iva_priv,
};
-struct omap_mbox *omap2_mboxes[] = { &mbox_iva_info, &mbox_dsp_info, NULL };
+struct omap_mbox *omap2_mboxes[] = { &mbox_dsp_info, &mbox_iva_info, NULL };
#endif
#if defined(CONFIG_ARCH_OMAP4)
diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c
index 98148b6c36e9..6c84659cf846 100644
--- a/arch/arm/mach-omap2/mux.c
+++ b/arch/arm/mach-omap2/mux.c
@@ -605,7 +605,7 @@ static void __init omap_mux_dbg_create_entry(
list_for_each_entry(e, &partition->muxmodes, node) {
struct omap_mux *m = &e->mux;
- (void)debugfs_create_file(m->muxnames[0], S_IWUGO, mux_dbg_dir,
+ (void)debugfs_create_file(m->muxnames[0], S_IWUSR, mux_dbg_dir,
m, &omap_mux_dbg_signal_fops);
}
}
diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c
index 125f56591fb5..a5a83b358ddd 100644
--- a/arch/arm/mach-omap2/pm-debug.c
+++ b/arch/arm/mach-omap2/pm-debug.c
@@ -637,14 +637,14 @@ static int __init pm_dbg_init(void)
}
- (void) debugfs_create_file("enable_off_mode", S_IRUGO | S_IWUGO, d,
+ (void) debugfs_create_file("enable_off_mode", S_IRUGO | S_IWUSR, d,
&enable_off_mode, &pm_dbg_option_fops);
- (void) debugfs_create_file("sleep_while_idle", S_IRUGO | S_IWUGO, d,
+ (void) debugfs_create_file("sleep_while_idle", S_IRUGO | S_IWUSR, d,
&sleep_while_idle, &pm_dbg_option_fops);
- (void) debugfs_create_file("wakeup_timer_seconds", S_IRUGO | S_IWUGO, d,
+ (void) debugfs_create_file("wakeup_timer_seconds", S_IRUGO | S_IWUSR, d,
&wakeup_timer_seconds, &pm_dbg_option_fops);
(void) debugfs_create_file("wakeup_timer_milliseconds",
- S_IRUGO | S_IWUGO, d, &wakeup_timer_milliseconds,
+ S_IRUGO | S_IWUSR, d, &wakeup_timer_milliseconds,
&pm_dbg_option_fops);
pm_dbg_init_done = 1;
diff --git a/arch/arm/mach-omap2/prcm_mpu44xx.h b/arch/arm/mach-omap2/prcm_mpu44xx.h
index 729a644ce852..3300ff6e3cfe 100644
--- a/arch/arm/mach-omap2/prcm_mpu44xx.h
+++ b/arch/arm/mach-omap2/prcm_mpu44xx.h
@@ -38,8 +38,8 @@
#define OMAP4430_PRCM_MPU_CPU1_INST 0x0800
/* PRCM_MPU clockdomain register offsets (from instance start) */
-#define OMAP4430_PRCM_MPU_CPU0_MPU_CDOFFS 0x0000
-#define OMAP4430_PRCM_MPU_CPU1_MPU_CDOFFS 0x0000
+#define OMAP4430_PRCM_MPU_CPU0_MPU_CDOFFS 0x0018
+#define OMAP4430_PRCM_MPU_CPU1_MPU_CDOFFS 0x0018
/*
diff --git a/arch/arm/mach-omap2/smartreflex.c b/arch/arm/mach-omap2/smartreflex.c
index c37e823266d3..1a777e34d0c2 100644
--- a/arch/arm/mach-omap2/smartreflex.c
+++ b/arch/arm/mach-omap2/smartreflex.c
@@ -282,6 +282,7 @@ error:
dev_err(&sr_info->pdev->dev, "%s: ERROR in registering"
"interrupt handler. Smartreflex will"
"not function as desired\n", __func__);
+ kfree(name);
kfree(sr_info);
return ret;
}
@@ -879,7 +880,7 @@ static int __init omap_sr_probe(struct platform_device *pdev)
ret = sr_late_init(sr_info);
if (ret) {
pr_warning("%s: Error in SR late init\n", __func__);
- return ret;
+ goto err_release_region;
}
}
@@ -890,17 +891,20 @@ static int __init omap_sr_probe(struct platform_device *pdev)
* not try to create rest of the debugfs entries.
*/
vdd_dbg_dir = omap_voltage_get_dbgdir(sr_info->voltdm);
- if (!vdd_dbg_dir)
- return -EINVAL;
+ if (!vdd_dbg_dir) {
+ ret = -EINVAL;
+ goto err_release_region;
+ }
dbg_dir = debugfs_create_dir("smartreflex", vdd_dbg_dir);
if (IS_ERR(dbg_dir)) {
dev_err(&pdev->dev, "%s: Unable to create debugfs directory\n",
__func__);
- return PTR_ERR(dbg_dir);
+ ret = PTR_ERR(dbg_dir);
+ goto err_release_region;
}
- (void) debugfs_create_file("autocomp", S_IRUGO | S_IWUGO, dbg_dir,
+ (void) debugfs_create_file("autocomp", S_IRUGO | S_IWUSR, dbg_dir,
(void *)sr_info, &pm_sr_fops);
(void) debugfs_create_x32("errweight", S_IRUGO, dbg_dir,
&sr_info->err_weight);
@@ -913,7 +917,8 @@ static int __init omap_sr_probe(struct platform_device *pdev)
if (IS_ERR(nvalue_dir)) {
dev_err(&pdev->dev, "%s: Unable to create debugfs directory"
"for n-values\n", __func__);
- return PTR_ERR(nvalue_dir);
+ ret = PTR_ERR(nvalue_dir);
+ goto err_release_region;
}
omap_voltage_get_volttable(sr_info->voltdm, &volt_data);
@@ -922,24 +927,16 @@ static int __init omap_sr_probe(struct platform_device *pdev)
" corresponding vdd vdd_%s. Cannot create debugfs"
"entries for n-values\n",
__func__, sr_info->voltdm->name);
- return -ENODATA;
+ ret = -ENODATA;
+ goto err_release_region;
}
for (i = 0; i < sr_info->nvalue_count; i++) {
- char *name;
- char volt_name[32];
-
- name = kzalloc(NVALUE_NAME_LEN + 1, GFP_KERNEL);
- if (!name) {
- dev_err(&pdev->dev, "%s: Unable to allocate memory"
- " for n-value directory name\n", __func__);
- return -ENOMEM;
- }
+ char name[NVALUE_NAME_LEN + 1];
- strcpy(name, "volt_");
- sprintf(volt_name, "%d", volt_data[i].volt_nominal);
- strcat(name, volt_name);
- (void) debugfs_create_x32(name, S_IRUGO | S_IWUGO, nvalue_dir,
+ snprintf(name, sizeof(name), "volt_%d",
+ volt_data[i].volt_nominal);
+ (void) debugfs_create_x32(name, S_IRUGO | S_IWUSR, nvalue_dir,
&(sr_info->nvalue_table[i].nvalue));
}
diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c
index 7b7c2683ae7b..0fc550e7e482 100644
--- a/arch/arm/mach-omap2/timer-gp.c
+++ b/arch/arm/mach-omap2/timer-gp.c
@@ -39,6 +39,7 @@
#include <asm/mach/time.h>
#include <plat/dmtimer.h>
#include <asm/localtimer.h>
+#include <asm/sched_clock.h>
#include "timer-gp.h"
@@ -190,6 +191,7 @@ static void __init omap2_gp_clocksource_init(void)
/*
* clocksource
*/
+static DEFINE_CLOCK_DATA(cd);
static struct omap_dm_timer *gpt_clocksource;
static cycle_t clocksource_read_cycles(struct clocksource *cs)
{
@@ -204,6 +206,15 @@ static struct clocksource clocksource_gpt = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
+static void notrace dmtimer_update_sched_clock(void)
+{
+ u32 cyc;
+
+ cyc = omap_dm_timer_read_counter(gpt_clocksource);
+
+ update_sched_clock(&cd, cyc, (u32)~0);
+}
+
/* Setup free-running counter for clocksource */
static void __init omap2_gp_clocksource_init(void)
{
@@ -224,6 +235,8 @@ static void __init omap2_gp_clocksource_init(void)
omap_dm_timer_set_load_start(gpt, 1, 0);
+ init_sched_clock(&cd, dmtimer_update_sched_clock, 32, tick_rate);
+
if (clocksource_register_hz(&clocksource_gpt, tick_rate))
printk(err2, clocksource_gpt.name);
}
diff --git a/arch/arm/mach-pxa/pxa25x.c b/arch/arm/mach-pxa/pxa25x.c
index fbc5b775f895..b166b1d845d7 100644
--- a/arch/arm/mach-pxa/pxa25x.c
+++ b/arch/arm/mach-pxa/pxa25x.c
@@ -347,6 +347,7 @@ static struct platform_device *pxa25x_devices[] __initdata = {
&pxa25x_device_assp,
&pxa25x_device_pwm0,
&pxa25x_device_pwm1,
+ &pxa_device_asoc_platform,
};
static struct sys_device pxa25x_sysdev[] = {
diff --git a/arch/arm/mach-pxa/tosa-bt.c b/arch/arm/mach-pxa/tosa-bt.c
index c31e601eb49c..b9b1e5c2b290 100644
--- a/arch/arm/mach-pxa/tosa-bt.c
+++ b/arch/arm/mach-pxa/tosa-bt.c
@@ -81,8 +81,6 @@ static int tosa_bt_probe(struct platform_device *dev)
goto err_rfk_alloc;
}
- rfkill_set_led_trigger_name(rfk, "tosa-bt");
-
rc = rfkill_register(rfk);
if (rc)
goto err_rfkill;
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index af152e70cfcf..f2582ec300d9 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -875,6 +875,11 @@ static struct platform_device sharpsl_rom_device = {
.dev.platform_data = &sharpsl_rom_data,
};
+static struct platform_device wm9712_device = {
+ .name = "wm9712-codec",
+ .id = -1,
+};
+
static struct platform_device *devices[] __initdata = {
&tosascoop_device,
&tosascoop_jc_device,
@@ -885,6 +890,7 @@ static struct platform_device *devices[] __initdata = {
&tosaled_device,
&tosa_bt_device,
&sharpsl_rom_device,
+ &wm9712_device,
};
static void tosa_poweroff(void)
diff --git a/arch/arm/mach-s3c2440/Kconfig b/arch/arm/mach-s3c2440/Kconfig
index a0cb2581894f..50825a3f91cc 100644
--- a/arch/arm/mach-s3c2440/Kconfig
+++ b/arch/arm/mach-s3c2440/Kconfig
@@ -99,6 +99,7 @@ config MACH_NEO1973_GTA02
select POWER_SUPPLY
select MACH_NEO1973
select S3C2410_PWM
+ select S3C_DEV_USB_HOST
help
Say Y here if you are using the Openmoko GTA02 / Freerunner GSM Phone
diff --git a/arch/arm/mach-s3c2440/include/mach/gta02.h b/arch/arm/mach-s3c2440/include/mach/gta02.h
index 953331d8d56a..3a56a229cac6 100644
--- a/arch/arm/mach-s3c2440/include/mach/gta02.h
+++ b/arch/arm/mach-s3c2440/include/mach/gta02.h
@@ -44,19 +44,19 @@
#define GTA02v3_GPIO_nUSB_FLT S3C2410_GPG(10) /* v3 + v4 only */
#define GTA02v3_GPIO_nGSM_OC S3C2410_GPG(11) /* v3 + v4 only */
-#define GTA02_GPIO_AMP_SHUT S3C2440_GPJ1 /* v2 + v3 + v4 only */
-#define GTA02v1_GPIO_WLAN_GPIO10 S3C2440_GPJ2
-#define GTA02_GPIO_HP_IN S3C2440_GPJ2 /* v2 + v3 + v4 only */
-#define GTA02_GPIO_INT0 S3C2440_GPJ3 /* v2 + v3 + v4 only */
-#define GTA02_GPIO_nGSM_EN S3C2440_GPJ4
-#define GTA02_GPIO_3D_RESET S3C2440_GPJ5
-#define GTA02_GPIO_nDL_GSM S3C2440_GPJ6 /* v4 + v5 only */
-#define GTA02_GPIO_WLAN_GPIO0 S3C2440_GPJ7
-#define GTA02v1_GPIO_BAT_ID S3C2440_GPJ8
-#define GTA02_GPIO_KEEPACT S3C2440_GPJ8
-#define GTA02v1_GPIO_HP_IN S3C2440_GPJ10
-#define GTA02_CHIP_PWD S3C2440_GPJ11 /* v2 + v3 + v4 only */
-#define GTA02_GPIO_nWLAN_RESET S3C2440_GPJ12 /* v2 + v3 + v4 only */
+#define GTA02_GPIO_AMP_SHUT S3C2410_GPJ(1) /* v2 + v3 + v4 only */
+#define GTA02v1_GPIO_WLAN_GPIO10 S3C2410_GPJ(2)
+#define GTA02_GPIO_HP_IN S3C2410_GPJ(2) /* v2 + v3 + v4 only */
+#define GTA02_GPIO_INT0 S3C2410_GPJ(3) /* v2 + v3 + v4 only */
+#define GTA02_GPIO_nGSM_EN S3C2410_GPJ(4)
+#define GTA02_GPIO_3D_RESET S3C2410_GPJ(5)
+#define GTA02_GPIO_nDL_GSM S3C2410_GPJ(6) /* v4 + v5 only */
+#define GTA02_GPIO_WLAN_GPIO0 S3C2410_GPJ(7)
+#define GTA02v1_GPIO_BAT_ID S3C2410_GPJ(8)
+#define GTA02_GPIO_KEEPACT S3C2410_GPJ(8)
+#define GTA02v1_GPIO_HP_IN S3C2410_GPJ(10)
+#define GTA02_CHIP_PWD S3C2410_GPJ(11) /* v2 + v3 + v4 only */
+#define GTA02_GPIO_nWLAN_RESET S3C2410_GPJ(12) /* v2 + v3 + v4 only */
#define GTA02_IRQ_GSENSOR_1 IRQ_EINT0
#define GTA02_IRQ_MODEM IRQ_EINT1
diff --git a/arch/arm/mach-s3c64xx/clock.c b/arch/arm/mach-s3c64xx/clock.c
index dd3782064508..fdfc4d5e37a1 100644
--- a/arch/arm/mach-s3c64xx/clock.c
+++ b/arch/arm/mach-s3c64xx/clock.c
@@ -151,6 +151,12 @@ static struct clk init_clocks_off[] = {
.enable = s3c64xx_pclk_ctrl,
.ctrlbit = S3C_CLKCON_PCLK_IIC,
}, {
+ .name = "i2c",
+ .id = 1,
+ .parent = &clk_p,
+ .enable = s3c64xx_pclk_ctrl,
+ .ctrlbit = S3C6410_CLKCON_PCLK_I2C1,
+ }, {
.name = "iis",
.id = 0,
.parent = &clk_p,
diff --git a/arch/arm/mach-s3c64xx/dma.c b/arch/arm/mach-s3c64xx/dma.c
index 135db1b41252..c35585cf8c4f 100644
--- a/arch/arm/mach-s3c64xx/dma.c
+++ b/arch/arm/mach-s3c64xx/dma.c
@@ -690,12 +690,12 @@ static int s3c64xx_dma_init1(int chno, enum dma_ch chbase,
regptr = regs + PL080_Cx_BASE(0);
- for (ch = 0; ch < 8; ch++, chno++, chptr++) {
- printk(KERN_INFO "%s: registering DMA %d (%p)\n",
- __func__, chno, regptr);
+ for (ch = 0; ch < 8; ch++, chptr++) {
+ pr_debug("%s: registering DMA %d (%p)\n",
+ __func__, chno + ch, regptr);
chptr->bit = 1 << ch;
- chptr->number = chno;
+ chptr->number = chno + ch;
chptr->dmac = dmac;
chptr->regs = regptr;
regptr += PL080_Cx_STRIDE;
@@ -704,7 +704,8 @@ static int s3c64xx_dma_init1(int chno, enum dma_ch chbase,
/* for the moment, permanently enable the controller */
writel(PL080_CONFIG_ENABLE, regs + PL080_CONFIG);
- printk(KERN_INFO "PL080: IRQ %d, at %p\n", irq, regs);
+ printk(KERN_INFO "PL080: IRQ %d, at %p, channels %d..%d\n",
+ irq, regs, chno, chno+8);
return 0;
diff --git a/arch/arm/mach-s3c64xx/gpiolib.c b/arch/arm/mach-s3c64xx/gpiolib.c
index fd99a82e82c4..92b09085caaa 100644
--- a/arch/arm/mach-s3c64xx/gpiolib.c
+++ b/arch/arm/mach-s3c64xx/gpiolib.c
@@ -72,7 +72,7 @@ static struct s3c_gpio_cfg gpio_4bit_cfg_eint0011 = {
.get_pull = s3c_gpio_getpull_updown,
};
-int s3c64xx_gpio2int_gpm(struct gpio_chip *chip, unsigned pin)
+static int s3c64xx_gpio2int_gpm(struct gpio_chip *chip, unsigned pin)
{
return pin < 5 ? IRQ_EINT(23) + pin : -ENXIO;
}
@@ -138,7 +138,7 @@ static struct s3c_gpio_chip gpio_4bit[] = {
},
};
-int s3c64xx_gpio2int_gpl(struct gpio_chip *chip, unsigned pin)
+static int s3c64xx_gpio2int_gpl(struct gpio_chip *chip, unsigned pin)
{
return pin >= 8 ? IRQ_EINT(16) + pin - 8 : -ENXIO;
}
diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c
index e85192a86fbe..a80a3163dd30 100644
--- a/arch/arm/mach-s3c64xx/mach-smdk6410.c
+++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c
@@ -28,6 +28,7 @@
#include <linux/delay.h>
#include <linux/smsc911x.h>
#include <linux/regulator/fixed.h>
+#include <linux/regulator/machine.h>
#ifdef CONFIG_SMDK6410_WM1190_EV1
#include <linux/mfd/wm8350/core.h>
@@ -351,7 +352,7 @@ static struct regulator_init_data smdk6410_vddpll = {
/* VDD_UH_MMC, LDO5 on J5 */
static struct regulator_init_data smdk6410_vdduh_mmc = {
.constraints = {
- .name = "PVDD_UH/PVDD_MMC",
+ .name = "PVDD_UH+PVDD_MMC",
.always_on = 1,
},
};
@@ -417,7 +418,7 @@ static struct regulator_init_data smdk6410_vddaudio = {
/* S3C64xx internal logic & PLL */
static struct regulator_init_data wm8350_dcdc1_data = {
.constraints = {
- .name = "PVDD_INT/PVDD_PLL",
+ .name = "PVDD_INT+PVDD_PLL",
.min_uV = 1200000,
.max_uV = 1200000,
.always_on = 1,
@@ -452,7 +453,7 @@ static struct regulator_consumer_supply wm8350_dcdc4_consumers[] = {
static struct regulator_init_data wm8350_dcdc4_data = {
.constraints = {
- .name = "PVDD_HI/PVDD_EXT/PVDD_SYS/PVCCM2MTV",
+ .name = "PVDD_HI+PVDD_EXT+PVDD_SYS+PVCCM2MTV",
.min_uV = 3000000,
.max_uV = 3000000,
.always_on = 1,
@@ -464,7 +465,7 @@ static struct regulator_init_data wm8350_dcdc4_data = {
/* OTGi/1190-EV1 HPVDD & AVDD */
static struct regulator_init_data wm8350_ldo4_data = {
.constraints = {
- .name = "PVDD_OTGI/HPVDD/AVDD",
+ .name = "PVDD_OTGI+HPVDD+AVDD",
.min_uV = 1200000,
.max_uV = 1200000,
.apply_uV = 1,
@@ -552,7 +553,7 @@ static struct wm831x_backlight_pdata wm1192_backlight_pdata = {
static struct regulator_init_data wm1192_dcdc3 = {
.constraints = {
- .name = "PVDD_MEM/PVDD_GPS",
+ .name = "PVDD_MEM+PVDD_GPS",
.always_on = 1,
},
};
@@ -563,7 +564,7 @@ static struct regulator_consumer_supply wm1192_ldo1_consumers[] = {
static struct regulator_init_data wm1192_ldo1 = {
.constraints = {
- .name = "PVDD_LCD/PVDD_EXT",
+ .name = "PVDD_LCD+PVDD_EXT",
.always_on = 1,
},
.consumer_supplies = wm1192_ldo1_consumers,
diff --git a/arch/arm/mach-s3c64xx/setup-keypad.c b/arch/arm/mach-s3c64xx/setup-keypad.c
index f8ed0d22db70..1d4d0ee9e870 100644
--- a/arch/arm/mach-s3c64xx/setup-keypad.c
+++ b/arch/arm/mach-s3c64xx/setup-keypad.c
@@ -17,7 +17,7 @@
void samsung_keypad_cfg_gpio(unsigned int rows, unsigned int cols)
{
/* Set all the necessary GPK pins to special-function 3: KP_ROW[x] */
- s3c_gpio_cfgrange_nopull(S3C64XX_GPK(8), 8 + rows, S3C_GPIO_SFN(3));
+ s3c_gpio_cfgrange_nopull(S3C64XX_GPK(8), rows, S3C_GPIO_SFN(3));
/* Set all the necessary GPL pins to special-function 3: KP_COL[x] */
s3c_gpio_cfgrange_nopull(S3C64XX_GPL(0), cols, S3C_GPIO_SFN(3));
diff --git a/arch/arm/mach-s3c64xx/setup-sdhci.c b/arch/arm/mach-s3c64xx/setup-sdhci.c
index 1a942037c4ef..f344a222bc84 100644
--- a/arch/arm/mach-s3c64xx/setup-sdhci.c
+++ b/arch/arm/mach-s3c64xx/setup-sdhci.c
@@ -56,7 +56,7 @@ void s3c6400_setup_sdhci_cfg_card(struct platform_device *dev,
else
ctrl3 = (S3C_SDHCI_CTRL3_FCSEL1 | S3C_SDHCI_CTRL3_FCSEL0);
- printk(KERN_INFO "%s: CTRL 2=%08x, 3=%08x\n", __func__, ctrl2, ctrl3);
+ pr_debug("%s: CTRL 2=%08x, 3=%08x\n", __func__, ctrl2, ctrl3);
writel(ctrl2, r + S3C_SDHCI_CONTROL2);
writel(ctrl3, r + S3C_SDHCI_CONTROL3);
}
diff --git a/arch/arm/mach-s5p6442/include/mach/map.h b/arch/arm/mach-s5p6442/include/mach/map.h
index 203dd5a18bd5..058dab4482a1 100644
--- a/arch/arm/mach-s5p6442/include/mach/map.h
+++ b/arch/arm/mach-s5p6442/include/mach/map.h
@@ -1,6 +1,6 @@
/* linux/arch/arm/mach-s5p6442/include/mach/map.h
*
- * Copyright (c) 2010 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
*
* S5P6442 - Memory map definitions
@@ -16,56 +16,61 @@
#include <plat/map-base.h>
#include <plat/map-s5p.h>
-#define S5P6442_PA_CHIPID (0xE0000000)
-#define S5P_PA_CHIPID S5P6442_PA_CHIPID
+#define S5P6442_PA_SDRAM 0x20000000
-#define S5P6442_PA_SYSCON (0xE0100000)
-#define S5P_PA_SYSCON S5P6442_PA_SYSCON
+#define S5P6442_PA_I2S0 0xC0B00000
+#define S5P6442_PA_I2S1 0xF2200000
-#define S5P6442_PA_GPIO (0xE0200000)
+#define S5P6442_PA_CHIPID 0xE0000000
-#define S5P6442_PA_VIC0 (0xE4000000)
-#define S5P6442_PA_VIC1 (0xE4100000)
-#define S5P6442_PA_VIC2 (0xE4200000)
+#define S5P6442_PA_SYSCON 0xE0100000
-#define S5P6442_PA_SROMC (0xE7000000)
-#define S5P_PA_SROMC S5P6442_PA_SROMC
+#define S5P6442_PA_GPIO 0xE0200000
-#define S5P6442_PA_MDMA 0xE8000000
-#define S5P6442_PA_PDMA 0xE9000000
+#define S5P6442_PA_VIC0 0xE4000000
+#define S5P6442_PA_VIC1 0xE4100000
+#define S5P6442_PA_VIC2 0xE4200000
-#define S5P6442_PA_TIMER (0xEA000000)
-#define S5P_PA_TIMER S5P6442_PA_TIMER
+#define S5P6442_PA_SROMC 0xE7000000
-#define S5P6442_PA_SYSTIMER (0xEA100000)
+#define S5P6442_PA_MDMA 0xE8000000
+#define S5P6442_PA_PDMA 0xE9000000
-#define S5P6442_PA_WATCHDOG (0xEA200000)
+#define S5P6442_PA_TIMER 0xEA000000
-#define S5P6442_PA_UART (0xEC000000)
+#define S5P6442_PA_SYSTIMER 0xEA100000
-#define S5P_PA_UART0 (S5P6442_PA_UART + 0x0)
-#define S5P_PA_UART1 (S5P6442_PA_UART + 0x400)
-#define S5P_PA_UART2 (S5P6442_PA_UART + 0x800)
-#define S5P_SZ_UART SZ_256
+#define S5P6442_PA_WATCHDOG 0xEA200000
-#define S5P6442_PA_IIC0 (0xEC100000)
+#define S5P6442_PA_UART 0xEC000000
-#define S5P6442_PA_SDRAM (0x20000000)
-#define S5P_PA_SDRAM S5P6442_PA_SDRAM
+#define S5P6442_PA_IIC0 0xEC100000
#define S5P6442_PA_SPI 0xEC300000
-/* I2S */
-#define S5P6442_PA_I2S0 0xC0B00000
-#define S5P6442_PA_I2S1 0xF2200000
-
-/* PCM */
#define S5P6442_PA_PCM0 0xF2400000
#define S5P6442_PA_PCM1 0xF2500000
-/* compatibiltiy defines. */
+/* Compatibiltiy Defines */
+
+#define S3C_PA_IIC S5P6442_PA_IIC0
#define S3C_PA_WDT S5P6442_PA_WATCHDOG
+
+#define S5P_PA_CHIPID S5P6442_PA_CHIPID
+#define S5P_PA_SDRAM S5P6442_PA_SDRAM
+#define S5P_PA_SROMC S5P6442_PA_SROMC
+#define S5P_PA_SYSCON S5P6442_PA_SYSCON
+#define S5P_PA_TIMER S5P6442_PA_TIMER
+
+/* UART */
+
#define S3C_PA_UART S5P6442_PA_UART
-#define S3C_PA_IIC S5P6442_PA_IIC0
+
+#define S5P_PA_UART(x) (S3C_PA_UART + ((x) * S3C_UART_OFFSET))
+#define S5P_PA_UART0 S5P_PA_UART(0)
+#define S5P_PA_UART1 S5P_PA_UART(1)
+#define S5P_PA_UART2 S5P_PA_UART(2)
+
+#define S5P_SZ_UART SZ_256
#endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/mach-s5p64x0/include/mach/gpio.h b/arch/arm/mach-s5p64x0/include/mach/gpio.h
index 5486c8f01f1d..adb5f298ead8 100644
--- a/arch/arm/mach-s5p64x0/include/mach/gpio.h
+++ b/arch/arm/mach-s5p64x0/include/mach/gpio.h
@@ -23,7 +23,7 @@
#define S5P6440_GPIO_A_NR (6)
#define S5P6440_GPIO_B_NR (7)
#define S5P6440_GPIO_C_NR (8)
-#define S5P6440_GPIO_F_NR (2)
+#define S5P6440_GPIO_F_NR (16)
#define S5P6440_GPIO_G_NR (7)
#define S5P6440_GPIO_H_NR (10)
#define S5P6440_GPIO_I_NR (16)
@@ -36,7 +36,7 @@
#define S5P6450_GPIO_B_NR (7)
#define S5P6450_GPIO_C_NR (8)
#define S5P6450_GPIO_D_NR (8)
-#define S5P6450_GPIO_F_NR (2)
+#define S5P6450_GPIO_F_NR (16)
#define S5P6450_GPIO_G_NR (14)
#define S5P6450_GPIO_H_NR (10)
#define S5P6450_GPIO_I_NR (16)
diff --git a/arch/arm/mach-s5p64x0/include/mach/map.h b/arch/arm/mach-s5p64x0/include/mach/map.h
index a9365e5ba614..95c91257c7ca 100644
--- a/arch/arm/mach-s5p64x0/include/mach/map.h
+++ b/arch/arm/mach-s5p64x0/include/mach/map.h
@@ -1,6 +1,6 @@
/* linux/arch/arm/mach-s5p64x0/include/mach/map.h
*
- * Copyright (c) 2009-2010 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2009-2011 Samsung Electronics Co., Ltd.
* http://www.samsung.com
*
* S5P64X0 - Memory map definitions
@@ -16,64 +16,46 @@
#include <plat/map-base.h>
#include <plat/map-s5p.h>
-#define S5P64X0_PA_SDRAM (0x20000000)
+#define S5P64X0_PA_SDRAM 0x20000000
-#define S5P64X0_PA_CHIPID (0xE0000000)
-#define S5P_PA_CHIPID S5P64X0_PA_CHIPID
-
-#define S5P64X0_PA_SYSCON (0xE0100000)
-#define S5P_PA_SYSCON S5P64X0_PA_SYSCON
-
-#define S5P64X0_PA_GPIO (0xE0308000)
-
-#define S5P64X0_PA_VIC0 (0xE4000000)
-#define S5P64X0_PA_VIC1 (0xE4100000)
+#define S5P64X0_PA_CHIPID 0xE0000000
-#define S5P64X0_PA_SROMC (0xE7000000)
-#define S5P_PA_SROMC S5P64X0_PA_SROMC
-
-#define S5P64X0_PA_PDMA (0xE9000000)
-
-#define S5P64X0_PA_TIMER (0xEA000000)
-#define S5P_PA_TIMER S5P64X0_PA_TIMER
+#define S5P64X0_PA_SYSCON 0xE0100000
-#define S5P64X0_PA_RTC (0xEA100000)
+#define S5P64X0_PA_GPIO 0xE0308000
-#define S5P64X0_PA_WDT (0xEA200000)
+#define S5P64X0_PA_VIC0 0xE4000000
+#define S5P64X0_PA_VIC1 0xE4100000
-#define S5P6440_PA_UART(x) (0xEC000000 + ((x) * S3C_UART_OFFSET))
-#define S5P6450_PA_UART(x) ((x < 5) ? (0xEC800000 + ((x) * S3C_UART_OFFSET)) : (0xEC000000))
+#define S5P64X0_PA_SROMC 0xE7000000
-#define S5P_PA_UART0 S5P6450_PA_UART(0)
-#define S5P_PA_UART1 S5P6450_PA_UART(1)
-#define S5P_PA_UART2 S5P6450_PA_UART(2)
-#define S5P_PA_UART3 S5P6450_PA_UART(3)
-#define S5P_PA_UART4 S5P6450_PA_UART(4)
-#define S5P_PA_UART5 S5P6450_PA_UART(5)
+#define S5P64X0_PA_PDMA 0xE9000000
-#define S5P_SZ_UART SZ_256
+#define S5P64X0_PA_TIMER 0xEA000000
+#define S5P64X0_PA_RTC 0xEA100000
+#define S5P64X0_PA_WDT 0xEA200000
-#define S5P6440_PA_IIC0 (0xEC104000)
-#define S5P6440_PA_IIC1 (0xEC20F000)
-#define S5P6450_PA_IIC0 (0xEC100000)
-#define S5P6450_PA_IIC1 (0xEC200000)
+#define S5P6440_PA_IIC0 0xEC104000
+#define S5P6440_PA_IIC1 0xEC20F000
+#define S5P6450_PA_IIC0 0xEC100000
+#define S5P6450_PA_IIC1 0xEC200000
-#define S5P64X0_PA_SPI0 (0xEC400000)
-#define S5P64X0_PA_SPI1 (0xEC500000)
+#define S5P64X0_PA_SPI0 0xEC400000
+#define S5P64X0_PA_SPI1 0xEC500000
-#define S5P64X0_PA_HSOTG (0xED100000)
+#define S5P64X0_PA_HSOTG 0xED100000
#define S5P64X0_PA_HSMMC(x) (0xED800000 + ((x) * 0x100000))
-#define S5P64X0_PA_I2S (0xF2000000)
+#define S5P64X0_PA_I2S 0xF2000000
#define S5P6450_PA_I2S1 0xF2800000
#define S5P6450_PA_I2S2 0xF2900000
-#define S5P64X0_PA_PCM (0xF2100000)
+#define S5P64X0_PA_PCM 0xF2100000
-#define S5P64X0_PA_ADC (0xF3000000)
+#define S5P64X0_PA_ADC 0xF3000000
-/* compatibiltiy defines. */
+/* Compatibiltiy Defines */
#define S3C_PA_HSMMC0 S5P64X0_PA_HSMMC(0)
#define S3C_PA_HSMMC1 S5P64X0_PA_HSMMC(1)
@@ -83,6 +65,25 @@
#define S3C_PA_RTC S5P64X0_PA_RTC
#define S3C_PA_WDT S5P64X0_PA_WDT
+#define S5P_PA_CHIPID S5P64X0_PA_CHIPID
+#define S5P_PA_SROMC S5P64X0_PA_SROMC
+#define S5P_PA_SYSCON S5P64X0_PA_SYSCON
+#define S5P_PA_TIMER S5P64X0_PA_TIMER
+
#define SAMSUNG_PA_ADC S5P64X0_PA_ADC
+/* UART */
+
+#define S5P6440_PA_UART(x) (0xEC000000 + ((x) * S3C_UART_OFFSET))
+#define S5P6450_PA_UART(x) ((x < 5) ? (0xEC800000 + ((x) * S3C_UART_OFFSET)) : (0xEC000000))
+
+#define S5P_PA_UART0 S5P6450_PA_UART(0)
+#define S5P_PA_UART1 S5P6450_PA_UART(1)
+#define S5P_PA_UART2 S5P6450_PA_UART(2)
+#define S5P_PA_UART3 S5P6450_PA_UART(3)
+#define S5P_PA_UART4 S5P6450_PA_UART(4)
+#define S5P_PA_UART5 S5P6450_PA_UART(5)
+
+#define S5P_SZ_UART SZ_256
+
#endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/mach-s5pc100/include/mach/map.h b/arch/arm/mach-s5pc100/include/mach/map.h
index 328467b346aa..ccbe6b767f7d 100644
--- a/arch/arm/mach-s5pc100/include/mach/map.h
+++ b/arch/arm/mach-s5pc100/include/mach/map.h
@@ -1,5 +1,8 @@
/* linux/arch/arm/mach-s5pc100/include/mach/map.h
*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ *
* Copyright 2009 Samsung Electronics Co.
* Byungho Min <bhmin@samsung.com>
*
@@ -16,145 +19,115 @@
#include <plat/map-base.h>
#include <plat/map-s5p.h>
-/*
- * map-base.h has already defined virtual memory address
- * S3C_VA_IRQ S3C_ADDR(0x00000000) irq controller(s)
- * S3C_VA_SYS S3C_ADDR(0x00100000) system control
- * S3C_VA_MEM S3C_ADDR(0x00200000) system control (not used)
- * S3C_VA_TIMER S3C_ADDR(0x00300000) timer block
- * S3C_VA_WATCHDOG S3C_ADDR(0x00400000) watchdog
- * S3C_VA_UART S3C_ADDR(0x01000000) UART
- *
- * S5PC100 specific virtual memory address can be defined here
- * S5PC1XX_VA_GPIO S3C_ADDR(0x00500000) GPIO
- *
- */
+#define S5PC100_PA_SDRAM 0x20000000
+
+#define S5PC100_PA_ONENAND 0xE7100000
+#define S5PC100_PA_ONENAND_BUF 0xB0000000
+
+#define S5PC100_PA_CHIPID 0xE0000000
-#define S5PC100_PA_ONENAND_BUF (0xB0000000)
-#define S5PC100_SZ_ONENAND_BUF (SZ_256M - SZ_32M)
+#define S5PC100_PA_SYSCON 0xE0100000
-/* Chip ID */
+#define S5PC100_PA_OTHERS 0xE0200000
-#define S5PC100_PA_CHIPID (0xE0000000)
-#define S5P_PA_CHIPID S5PC100_PA_CHIPID
+#define S5PC100_PA_GPIO 0xE0300000
-#define S5PC100_PA_SYSCON (0xE0100000)
-#define S5P_PA_SYSCON S5PC100_PA_SYSCON
+#define S5PC100_PA_VIC0 0xE4000000
+#define S5PC100_PA_VIC1 0xE4100000
+#define S5PC100_PA_VIC2 0xE4200000
-#define S5PC100_PA_OTHERS (0xE0200000)
-#define S5PC100_VA_OTHERS (S3C_VA_SYS + 0x10000)
+#define S5PC100_PA_SROMC 0xE7000000
-#define S5PC100_PA_GPIO (0xE0300000)
-#define S5PC1XX_VA_GPIO S3C_ADDR(0x00500000)
+#define S5PC100_PA_CFCON 0xE7800000
-/* Interrupt */
-#define S5PC100_PA_VIC0 (0xE4000000)
-#define S5PC100_PA_VIC1 (0xE4100000)
-#define S5PC100_PA_VIC2 (0xE4200000)
-#define S5PC100_VA_VIC S3C_VA_IRQ
-#define S5PC100_VA_VIC_OFFSET 0x10000
-#define S5PC1XX_VA_VIC(x) (S5PC100_VA_VIC + ((x) * S5PC100_VA_VIC_OFFSET))
+#define S5PC100_PA_MDMA 0xE8100000
+#define S5PC100_PA_PDMA0 0xE9000000
+#define S5PC100_PA_PDMA1 0xE9200000
-#define S5PC100_PA_SROMC (0xE7000000)
-#define S5P_PA_SROMC S5PC100_PA_SROMC
+#define S5PC100_PA_TIMER 0xEA000000
+#define S5PC100_PA_SYSTIMER 0xEA100000
+#define S5PC100_PA_WATCHDOG 0xEA200000
+#define S5PC100_PA_RTC 0xEA300000
-#define S5PC100_PA_ONENAND (0xE7100000)
+#define S5PC100_PA_UART 0xEC000000
-#define S5PC100_PA_CFCON (0xE7800000)
+#define S5PC100_PA_IIC0 0xEC100000
+#define S5PC100_PA_IIC1 0xEC200000
-/* DMA */
-#define S5PC100_PA_MDMA (0xE8100000)
-#define S5PC100_PA_PDMA0 (0xE9000000)
-#define S5PC100_PA_PDMA1 (0xE9200000)
+#define S5PC100_PA_SPI0 0xEC300000
+#define S5PC100_PA_SPI1 0xEC400000
+#define S5PC100_PA_SPI2 0xEC500000
-/* Timer */
-#define S5PC100_PA_TIMER (0xEA000000)
-#define S5P_PA_TIMER S5PC100_PA_TIMER
+#define S5PC100_PA_USB_HSOTG 0xED200000
+#define S5PC100_PA_USB_HSPHY 0xED300000
-#define S5PC100_PA_SYSTIMER (0xEA100000)
+#define S5PC100_PA_HSMMC(x) (0xED800000 + ((x) * 0x100000))
-#define S5PC100_PA_WATCHDOG (0xEA200000)
-#define S5PC100_PA_RTC (0xEA300000)
+#define S5PC100_PA_FB 0xEE000000
-#define S5PC100_PA_UART (0xEC000000)
+#define S5PC100_PA_FIMC0 0xEE200000
+#define S5PC100_PA_FIMC1 0xEE300000
+#define S5PC100_PA_FIMC2 0xEE400000
-#define S5P_PA_UART0 (S5PC100_PA_UART + 0x0)
-#define S5P_PA_UART1 (S5PC100_PA_UART + 0x400)
-#define S5P_PA_UART2 (S5PC100_PA_UART + 0x800)
-#define S5P_PA_UART3 (S5PC100_PA_UART + 0xC00)
-#define S5P_SZ_UART SZ_256
+#define S5PC100_PA_I2S0 0xF2000000
+#define S5PC100_PA_I2S1 0xF2100000
+#define S5PC100_PA_I2S2 0xF2200000
-#define S5PC100_PA_IIC0 (0xEC100000)
-#define S5PC100_PA_IIC1 (0xEC200000)
+#define S5PC100_PA_AC97 0xF2300000
-/* SPI */
-#define S5PC100_PA_SPI0 0xEC300000
-#define S5PC100_PA_SPI1 0xEC400000
-#define S5PC100_PA_SPI2 0xEC500000
+#define S5PC100_PA_PCM0 0xF2400000
+#define S5PC100_PA_PCM1 0xF2500000
-/* USB HS OTG */
-#define S5PC100_PA_USB_HSOTG (0xED200000)
-#define S5PC100_PA_USB_HSPHY (0xED300000)
+#define S5PC100_PA_SPDIF 0xF2600000
-#define S5PC100_PA_FB (0xEE000000)
+#define S5PC100_PA_TSADC 0xF3000000
-#define S5PC100_PA_FIMC0 (0xEE200000)
-#define S5PC100_PA_FIMC1 (0xEE300000)
-#define S5PC100_PA_FIMC2 (0xEE400000)
+#define S5PC100_PA_KEYPAD 0xF3100000
-#define S5PC100_PA_I2S0 (0xF2000000)
-#define S5PC100_PA_I2S1 (0xF2100000)
-#define S5PC100_PA_I2S2 (0xF2200000)
+/* Compatibiltiy Defines */
-#define S5PC100_PA_AC97 0xF2300000
+#define S3C_PA_FB S5PC100_PA_FB
+#define S3C_PA_HSMMC0 S5PC100_PA_HSMMC(0)
+#define S3C_PA_HSMMC1 S5PC100_PA_HSMMC(1)
+#define S3C_PA_HSMMC2 S5PC100_PA_HSMMC(2)
+#define S3C_PA_IIC S5PC100_PA_IIC0
+#define S3C_PA_IIC1 S5PC100_PA_IIC1
+#define S3C_PA_KEYPAD S5PC100_PA_KEYPAD
+#define S3C_PA_ONENAND S5PC100_PA_ONENAND
+#define S3C_PA_ONENAND_BUF S5PC100_PA_ONENAND_BUF
+#define S3C_PA_RTC S5PC100_PA_RTC
+#define S3C_PA_TSADC S5PC100_PA_TSADC
+#define S3C_PA_USB_HSOTG S5PC100_PA_USB_HSOTG
+#define S3C_PA_USB_HSPHY S5PC100_PA_USB_HSPHY
+#define S3C_PA_WDT S5PC100_PA_WATCHDOG
-/* PCM */
-#define S5PC100_PA_PCM0 0xF2400000
-#define S5PC100_PA_PCM1 0xF2500000
+#define S5P_PA_CHIPID S5PC100_PA_CHIPID
+#define S5P_PA_FIMC0 S5PC100_PA_FIMC0
+#define S5P_PA_FIMC1 S5PC100_PA_FIMC1
+#define S5P_PA_FIMC2 S5PC100_PA_FIMC2
+#define S5P_PA_SDRAM S5PC100_PA_SDRAM
+#define S5P_PA_SROMC S5PC100_PA_SROMC
+#define S5P_PA_SYSCON S5PC100_PA_SYSCON
+#define S5P_PA_TIMER S5PC100_PA_TIMER
-#define S5PC100_PA_SPDIF 0xF2600000
+#define SAMSUNG_PA_ADC S5PC100_PA_TSADC
+#define SAMSUNG_PA_CFCON S5PC100_PA_CFCON
+#define SAMSUNG_PA_KEYPAD S5PC100_PA_KEYPAD
-#define S5PC100_PA_TSADC (0xF3000000)
+#define S5PC100_VA_OTHERS (S3C_VA_SYS + 0x10000)
-/* KEYPAD */
-#define S5PC100_PA_KEYPAD (0xF3100000)
+#define S3C_SZ_ONENAND_BUF (SZ_256M - SZ_32M)
-#define S5PC100_PA_HSMMC(x) (0xED800000 + ((x) * 0x100000))
+/* UART */
-#define S5PC100_PA_SDRAM (0x20000000)
-#define S5P_PA_SDRAM S5PC100_PA_SDRAM
+#define S3C_PA_UART S5PC100_PA_UART
-/* compatibiltiy defines. */
-#define S3C_PA_UART S5PC100_PA_UART
-#define S3C_PA_IIC S5PC100_PA_IIC0
-#define S3C_PA_IIC1 S5PC100_PA_IIC1
-#define S3C_PA_FB S5PC100_PA_FB
-#define S3C_PA_G2D S5PC100_PA_G2D
-#define S3C_PA_G3D S5PC100_PA_G3D
-#define S3C_PA_JPEG S5PC100_PA_JPEG
-#define S3C_PA_ROTATOR S5PC100_PA_ROTATOR
-#define S5P_VA_VIC0 S5PC1XX_VA_VIC(0)
-#define S5P_VA_VIC1 S5PC1XX_VA_VIC(1)
-#define S5P_VA_VIC2 S5PC1XX_VA_VIC(2)
-#define S3C_PA_USB_HSOTG S5PC100_PA_USB_HSOTG
-#define S3C_PA_USB_HSPHY S5PC100_PA_USB_HSPHY
-#define S3C_PA_HSMMC0 S5PC100_PA_HSMMC(0)
-#define S3C_PA_HSMMC1 S5PC100_PA_HSMMC(1)
-#define S3C_PA_HSMMC2 S5PC100_PA_HSMMC(2)
-#define S3C_PA_KEYPAD S5PC100_PA_KEYPAD
-#define S3C_PA_WDT S5PC100_PA_WATCHDOG
-#define S3C_PA_TSADC S5PC100_PA_TSADC
-#define S3C_PA_ONENAND S5PC100_PA_ONENAND
-#define S3C_PA_ONENAND_BUF S5PC100_PA_ONENAND_BUF
-#define S3C_SZ_ONENAND_BUF S5PC100_SZ_ONENAND_BUF
-#define S3C_PA_RTC S5PC100_PA_RTC
-
-#define SAMSUNG_PA_ADC S5PC100_PA_TSADC
-#define SAMSUNG_PA_CFCON S5PC100_PA_CFCON
-#define SAMSUNG_PA_KEYPAD S5PC100_PA_KEYPAD
+#define S5P_PA_UART(x) (S3C_PA_UART + ((x) * S3C_UART_OFFSET))
+#define S5P_PA_UART0 S5P_PA_UART(0)
+#define S5P_PA_UART1 S5P_PA_UART(1)
+#define S5P_PA_UART2 S5P_PA_UART(2)
+#define S5P_PA_UART3 S5P_PA_UART(3)
-#define S5P_PA_FIMC0 S5PC100_PA_FIMC0
-#define S5P_PA_FIMC1 S5PC100_PA_FIMC1
-#define S5P_PA_FIMC2 S5PC100_PA_FIMC2
+#define S5P_SZ_UART SZ_256
-#endif /* __ASM_ARCH_C100_MAP_H */
+#endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/mach-s5pv210/include/mach/map.h b/arch/arm/mach-s5pv210/include/mach/map.h
index 3611492ad681..1dd58836fd4f 100644
--- a/arch/arm/mach-s5pv210/include/mach/map.h
+++ b/arch/arm/mach-s5pv210/include/mach/map.h
@@ -1,6 +1,6 @@
/* linux/arch/arm/mach-s5pv210/include/mach/map.h
*
- * Copyright (c) 2010 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
*
* S5PV210 - Memory map definitions
@@ -16,122 +16,120 @@
#include <plat/map-base.h>
#include <plat/map-s5p.h>
-#define S5PV210_PA_SROM_BANK5 (0xA8000000)
+#define S5PV210_PA_SDRAM 0x20000000
-#define S5PC110_PA_ONENAND (0xB0000000)
-#define S5P_PA_ONENAND S5PC110_PA_ONENAND
+#define S5PV210_PA_SROM_BANK5 0xA8000000
-#define S5PC110_PA_ONENAND_DMA (0xB0600000)
-#define S5P_PA_ONENAND_DMA S5PC110_PA_ONENAND_DMA
+#define S5PC110_PA_ONENAND 0xB0000000
+#define S5PC110_PA_ONENAND_DMA 0xB0600000
-#define S5PV210_PA_CHIPID (0xE0000000)
-#define S5P_PA_CHIPID S5PV210_PA_CHIPID
+#define S5PV210_PA_CHIPID 0xE0000000
-#define S5PV210_PA_SYSCON (0xE0100000)
-#define S5P_PA_SYSCON S5PV210_PA_SYSCON
+#define S5PV210_PA_SYSCON 0xE0100000
-#define S5PV210_PA_GPIO (0xE0200000)
+#define S5PV210_PA_GPIO 0xE0200000
-/* SPI */
-#define S5PV210_PA_SPI0 0xE1300000
-#define S5PV210_PA_SPI1 0xE1400000
+#define S5PV210_PA_SPDIF 0xE1100000
-#define S5PV210_PA_KEYPAD (0xE1600000)
+#define S5PV210_PA_SPI0 0xE1300000
+#define S5PV210_PA_SPI1 0xE1400000
-#define S5PV210_PA_IIC0 (0xE1800000)
-#define S5PV210_PA_IIC1 (0xFAB00000)
-#define S5PV210_PA_IIC2 (0xE1A00000)
+#define S5PV210_PA_KEYPAD 0xE1600000
-#define S5PV210_PA_TIMER (0xE2500000)
-#define S5P_PA_TIMER S5PV210_PA_TIMER
+#define S5PV210_PA_ADC 0xE1700000
-#define S5PV210_PA_SYSTIMER (0xE2600000)
+#define S5PV210_PA_IIC0 0xE1800000
+#define S5PV210_PA_IIC1 0xFAB00000
+#define S5PV210_PA_IIC2 0xE1A00000
-#define S5PV210_PA_WATCHDOG (0xE2700000)
+#define S5PV210_PA_AC97 0xE2200000
-#define S5PV210_PA_RTC (0xE2800000)
-#define S5PV210_PA_UART (0xE2900000)
+#define S5PV210_PA_PCM0 0xE2300000
+#define S5PV210_PA_PCM1 0xE1200000
+#define S5PV210_PA_PCM2 0xE2B00000
-#define S5P_PA_UART0 (S5PV210_PA_UART + 0x0)
-#define S5P_PA_UART1 (S5PV210_PA_UART + 0x400)
-#define S5P_PA_UART2 (S5PV210_PA_UART + 0x800)
-#define S5P_PA_UART3 (S5PV210_PA_UART + 0xC00)
+#define S5PV210_PA_TIMER 0xE2500000
+#define S5PV210_PA_SYSTIMER 0xE2600000
+#define S5PV210_PA_WATCHDOG 0xE2700000
+#define S5PV210_PA_RTC 0xE2800000
-#define S5P_SZ_UART SZ_256
+#define S5PV210_PA_UART 0xE2900000
-#define S3C_VA_UARTx(x) (S3C_VA_UART + ((x) * S3C_UART_OFFSET))
+#define S5PV210_PA_SROMC 0xE8000000
-#define S5PV210_PA_SROMC (0xE8000000)
-#define S5P_PA_SROMC S5PV210_PA_SROMC
+#define S5PV210_PA_CFCON 0xE8200000
-#define S5PV210_PA_CFCON (0xE8200000)
+#define S5PV210_PA_HSMMC(x) (0xEB000000 + ((x) * 0x100000))
-#define S5PV210_PA_MDMA 0xFA200000
-#define S5PV210_PA_PDMA0 0xE0900000
-#define S5PV210_PA_PDMA1 0xE0A00000
+#define S5PV210_PA_HSOTG 0xEC000000
+#define S5PV210_PA_HSPHY 0xEC100000
-#define S5PV210_PA_FB (0xF8000000)
+#define S5PV210_PA_IIS0 0xEEE30000
+#define S5PV210_PA_IIS1 0xE2100000
+#define S5PV210_PA_IIS2 0xE2A00000
-#define S5PV210_PA_FIMC0 (0xFB200000)
-#define S5PV210_PA_FIMC1 (0xFB300000)
-#define S5PV210_PA_FIMC2 (0xFB400000)
+#define S5PV210_PA_DMC0 0xF0000000
+#define S5PV210_PA_DMC1 0xF1400000
-#define S5PV210_PA_HSMMC(x) (0xEB000000 + ((x) * 0x100000))
+#define S5PV210_PA_VIC0 0xF2000000
+#define S5PV210_PA_VIC1 0xF2100000
+#define S5PV210_PA_VIC2 0xF2200000
+#define S5PV210_PA_VIC3 0xF2300000
-#define S5PV210_PA_HSOTG (0xEC000000)
-#define S5PV210_PA_HSPHY (0xEC100000)
+#define S5PV210_PA_FB 0xF8000000
-#define S5PV210_PA_VIC0 (0xF2000000)
-#define S5PV210_PA_VIC1 (0xF2100000)
-#define S5PV210_PA_VIC2 (0xF2200000)
-#define S5PV210_PA_VIC3 (0xF2300000)
+#define S5PV210_PA_MDMA 0xFA200000
+#define S5PV210_PA_PDMA0 0xE0900000
+#define S5PV210_PA_PDMA1 0xE0A00000
-#define S5PV210_PA_SDRAM (0x20000000)
-#define S5P_PA_SDRAM S5PV210_PA_SDRAM
+#define S5PV210_PA_MIPI_CSIS 0xFA600000
-/* S/PDIF */
-#define S5PV210_PA_SPDIF 0xE1100000
+#define S5PV210_PA_FIMC0 0xFB200000
+#define S5PV210_PA_FIMC1 0xFB300000
+#define S5PV210_PA_FIMC2 0xFB400000
-/* I2S */
-#define S5PV210_PA_IIS0 0xEEE30000
-#define S5PV210_PA_IIS1 0xE2100000
-#define S5PV210_PA_IIS2 0xE2A00000
+/* Compatibiltiy Defines */
-/* PCM */
-#define S5PV210_PA_PCM0 0xE2300000
-#define S5PV210_PA_PCM1 0xE1200000
-#define S5PV210_PA_PCM2 0xE2B00000
+#define S3C_PA_FB S5PV210_PA_FB
+#define S3C_PA_HSMMC0 S5PV210_PA_HSMMC(0)
+#define S3C_PA_HSMMC1 S5PV210_PA_HSMMC(1)
+#define S3C_PA_HSMMC2 S5PV210_PA_HSMMC(2)
+#define S3C_PA_HSMMC3 S5PV210_PA_HSMMC(3)
+#define S3C_PA_IIC S5PV210_PA_IIC0
+#define S3C_PA_IIC1 S5PV210_PA_IIC1
+#define S3C_PA_IIC2 S5PV210_PA_IIC2
+#define S3C_PA_RTC S5PV210_PA_RTC
+#define S3C_PA_USB_HSOTG S5PV210_PA_HSOTG
+#define S3C_PA_WDT S5PV210_PA_WATCHDOG
-/* AC97 */
-#define S5PV210_PA_AC97 0xE2200000
+#define S5P_PA_CHIPID S5PV210_PA_CHIPID
+#define S5P_PA_FIMC0 S5PV210_PA_FIMC0
+#define S5P_PA_FIMC1 S5PV210_PA_FIMC1
+#define S5P_PA_FIMC2 S5PV210_PA_FIMC2
+#define S5P_PA_MIPI_CSIS0 S5PV210_PA_MIPI_CSIS
+#define S5P_PA_ONENAND S5PC110_PA_ONENAND
+#define S5P_PA_ONENAND_DMA S5PC110_PA_ONENAND_DMA
+#define S5P_PA_SDRAM S5PV210_PA_SDRAM
+#define S5P_PA_SROMC S5PV210_PA_SROMC
+#define S5P_PA_SYSCON S5PV210_PA_SYSCON
+#define S5P_PA_TIMER S5PV210_PA_TIMER
-#define S5PV210_PA_ADC (0xE1700000)
+#define SAMSUNG_PA_ADC S5PV210_PA_ADC
+#define SAMSUNG_PA_CFCON S5PV210_PA_CFCON
+#define SAMSUNG_PA_KEYPAD S5PV210_PA_KEYPAD
-#define S5PV210_PA_DMC0 (0xF0000000)
-#define S5PV210_PA_DMC1 (0xF1400000)
+/* UART */
-#define S5PV210_PA_MIPI_CSIS 0xFA600000
+#define S3C_VA_UARTx(x) (S3C_VA_UART + ((x) * S3C_UART_OFFSET))
-/* compatibiltiy defines. */
-#define S3C_PA_UART S5PV210_PA_UART
-#define S3C_PA_HSMMC0 S5PV210_PA_HSMMC(0)
-#define S3C_PA_HSMMC1 S5PV210_PA_HSMMC(1)
-#define S3C_PA_HSMMC2 S5PV210_PA_HSMMC(2)
-#define S3C_PA_HSMMC3 S5PV210_PA_HSMMC(3)
-#define S3C_PA_IIC S5PV210_PA_IIC0
-#define S3C_PA_IIC1 S5PV210_PA_IIC1
-#define S3C_PA_IIC2 S5PV210_PA_IIC2
-#define S3C_PA_FB S5PV210_PA_FB
-#define S3C_PA_RTC S5PV210_PA_RTC
-#define S3C_PA_WDT S5PV210_PA_WATCHDOG
-#define S3C_PA_USB_HSOTG S5PV210_PA_HSOTG
-#define S5P_PA_FIMC0 S5PV210_PA_FIMC0
-#define S5P_PA_FIMC1 S5PV210_PA_FIMC1
-#define S5P_PA_FIMC2 S5PV210_PA_FIMC2
-#define S5P_PA_MIPI_CSIS0 S5PV210_PA_MIPI_CSIS
+#define S3C_PA_UART S5PV210_PA_UART
-#define SAMSUNG_PA_ADC S5PV210_PA_ADC
-#define SAMSUNG_PA_CFCON S5PV210_PA_CFCON
-#define SAMSUNG_PA_KEYPAD S5PV210_PA_KEYPAD
+#define S5P_PA_UART(x) (S3C_PA_UART + ((x) * S3C_UART_OFFSET))
+#define S5P_PA_UART0 S5P_PA_UART(0)
+#define S5P_PA_UART1 S5P_PA_UART(1)
+#define S5P_PA_UART2 S5P_PA_UART(2)
+#define S5P_PA_UART3 S5P_PA_UART(3)
+
+#define S5P_SZ_UART SZ_256
#endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/mach-s5pv210/mach-aquila.c b/arch/arm/mach-s5pv210/mach-aquila.c
index 461aa035afc0..557add4fc56c 100644
--- a/arch/arm/mach-s5pv210/mach-aquila.c
+++ b/arch/arm/mach-s5pv210/mach-aquila.c
@@ -149,7 +149,7 @@ static struct regulator_init_data aquila_ldo2_data = {
static struct regulator_init_data aquila_ldo3_data = {
.constraints = {
- .name = "VUSB/MIPI_1.1V",
+ .name = "VUSB+MIPI_1.1V",
.min_uV = 1100000,
.max_uV = 1100000,
.apply_uV = 1,
@@ -197,7 +197,7 @@ static struct regulator_init_data aquila_ldo7_data = {
static struct regulator_init_data aquila_ldo8_data = {
.constraints = {
- .name = "VUSB/VADC_3.3V",
+ .name = "VUSB+VADC_3.3V",
.min_uV = 3300000,
.max_uV = 3300000,
.apply_uV = 1,
@@ -207,7 +207,7 @@ static struct regulator_init_data aquila_ldo8_data = {
static struct regulator_init_data aquila_ldo9_data = {
.constraints = {
- .name = "VCC/VCAM_2.8V",
+ .name = "VCC+VCAM_2.8V",
.min_uV = 2800000,
.max_uV = 2800000,
.apply_uV = 1,
@@ -381,9 +381,12 @@ static struct max8998_platform_data aquila_max8998_pdata = {
.buck1_set1 = S5PV210_GPH0(3),
.buck1_set2 = S5PV210_GPH0(4),
.buck2_set3 = S5PV210_GPH0(5),
- .buck1_max_voltage1 = 1200000,
- .buck1_max_voltage2 = 1200000,
- .buck2_max_voltage = 1200000,
+ .buck1_voltage1 = 1200000,
+ .buck1_voltage2 = 1200000,
+ .buck1_voltage3 = 1200000,
+ .buck1_voltage4 = 1200000,
+ .buck2_voltage1 = 1200000,
+ .buck2_voltage2 = 1200000,
};
#endif
diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c
index e22d5112fd44..056f5c769b0a 100644
--- a/arch/arm/mach-s5pv210/mach-goni.c
+++ b/arch/arm/mach-s5pv210/mach-goni.c
@@ -288,7 +288,7 @@ static struct regulator_init_data goni_ldo2_data = {
static struct regulator_init_data goni_ldo3_data = {
.constraints = {
- .name = "VUSB/MIPI_1.1V",
+ .name = "VUSB+MIPI_1.1V",
.min_uV = 1100000,
.max_uV = 1100000,
.apply_uV = 1,
@@ -337,7 +337,7 @@ static struct regulator_init_data goni_ldo7_data = {
static struct regulator_init_data goni_ldo8_data = {
.constraints = {
- .name = "VUSB/VADC_3.3V",
+ .name = "VUSB+VADC_3.3V",
.min_uV = 3300000,
.max_uV = 3300000,
.apply_uV = 1,
@@ -347,7 +347,7 @@ static struct regulator_init_data goni_ldo8_data = {
static struct regulator_init_data goni_ldo9_data = {
.constraints = {
- .name = "VCC/VCAM_2.8V",
+ .name = "VCC+VCAM_2.8V",
.min_uV = 2800000,
.max_uV = 2800000,
.apply_uV = 1,
@@ -521,9 +521,12 @@ static struct max8998_platform_data goni_max8998_pdata = {
.buck1_set1 = S5PV210_GPH0(3),
.buck1_set2 = S5PV210_GPH0(4),
.buck2_set3 = S5PV210_GPH0(5),
- .buck1_max_voltage1 = 1200000,
- .buck1_max_voltage2 = 1200000,
- .buck2_max_voltage = 1200000,
+ .buck1_voltage1 = 1200000,
+ .buck1_voltage2 = 1200000,
+ .buck1_voltage3 = 1200000,
+ .buck1_voltage4 = 1200000,
+ .buck2_voltage1 = 1200000,
+ .buck2_voltage2 = 1200000,
};
#endif
diff --git a/arch/arm/mach-s5pv310/include/mach/map.h b/arch/arm/mach-s5pv310/include/mach/map.h
index 3060f78e12ab..901657fa7a12 100644
--- a/arch/arm/mach-s5pv310/include/mach/map.h
+++ b/arch/arm/mach-s5pv310/include/mach/map.h
@@ -1,6 +1,6 @@
/* linux/arch/arm/mach-s5pv310/include/mach/map.h
*
- * Copyright (c) 2010 Samsung Electronics Co., Ltd.
+ * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
*
* S5PV310 - Memory map definitions
@@ -23,90 +23,43 @@
#include <plat/map-s5p.h>
-#define S5PV310_PA_SYSRAM (0x02025000)
+#define S5PV310_PA_SYSRAM 0x02025000
-#define S5PV310_PA_SROM_BANK(x) (0x04000000 + ((x) * 0x01000000))
-
-#define S5PC210_PA_ONENAND (0x0C000000)
-#define S5P_PA_ONENAND S5PC210_PA_ONENAND
-
-#define S5PC210_PA_ONENAND_DMA (0x0C600000)
-#define S5P_PA_ONENAND_DMA S5PC210_PA_ONENAND_DMA
-
-#define S5PV310_PA_CHIPID (0x10000000)
-#define S5P_PA_CHIPID S5PV310_PA_CHIPID
-
-#define S5PV310_PA_SYSCON (0x10010000)
-#define S5P_PA_SYSCON S5PV310_PA_SYSCON
+#define S5PV310_PA_I2S0 0x03830000
+#define S5PV310_PA_I2S1 0xE3100000
+#define S5PV310_PA_I2S2 0xE2A00000
-#define S5PV310_PA_PMU (0x10020000)
+#define S5PV310_PA_PCM0 0x03840000
+#define S5PV310_PA_PCM1 0x13980000
+#define S5PV310_PA_PCM2 0x13990000
-#define S5PV310_PA_CMU (0x10030000)
-
-#define S5PV310_PA_WATCHDOG (0x10060000)
-#define S5PV310_PA_RTC (0x10070000)
-
-#define S5PV310_PA_DMC0 (0x10400000)
-
-#define S5PV310_PA_COMBINER (0x10448000)
-
-#define S5PV310_PA_COREPERI (0x10500000)
-#define S5PV310_PA_GIC_CPU (0x10500100)
-#define S5PV310_PA_TWD (0x10500600)
-#define S5PV310_PA_GIC_DIST (0x10501000)
-#define S5PV310_PA_L2CC (0x10502000)
-
-/* DMA */
-#define S5PV310_PA_MDMA 0x10810000
-#define S5PV310_PA_PDMA0 0x12680000
-#define S5PV310_PA_PDMA1 0x12690000
-
-#define S5PV310_PA_GPIO1 (0x11400000)
-#define S5PV310_PA_GPIO2 (0x11000000)
-#define S5PV310_PA_GPIO3 (0x03860000)
-
-#define S5PV310_PA_MIPI_CSIS0 0x11880000
-#define S5PV310_PA_MIPI_CSIS1 0x11890000
+#define S5PV310_PA_SROM_BANK(x) (0x04000000 + ((x) * 0x01000000))
-#define S5PV310_PA_HSMMC(x) (0x12510000 + ((x) * 0x10000))
+#define S5PC210_PA_ONENAND 0x0C000000
+#define S5PC210_PA_ONENAND_DMA 0x0C600000
-#define S5PV310_PA_SROMC (0x12570000)
-#define S5P_PA_SROMC S5PV310_PA_SROMC
+#define S5PV310_PA_CHIPID 0x10000000
-/* S/PDIF */
-#define S5PV310_PA_SPDIF 0xE1100000
+#define S5PV310_PA_SYSCON 0x10010000
+#define S5PV310_PA_PMU 0x10020000
+#define S5PV310_PA_CMU 0x10030000
-/* I2S */
-#define S5PV310_PA_I2S0 0x03830000
-#define S5PV310_PA_I2S1 0xE3100000
-#define S5PV310_PA_I2S2 0xE2A00000
+#define S5PV310_PA_WATCHDOG 0x10060000
+#define S5PV310_PA_RTC 0x10070000
-/* PCM */
-#define S5PV310_PA_PCM0 0x03840000
-#define S5PV310_PA_PCM1 0x13980000
-#define S5PV310_PA_PCM2 0x13990000
+#define S5PV310_PA_DMC0 0x10400000
-/* AC97 */
-#define S5PV310_PA_AC97 0x139A0000
+#define S5PV310_PA_COMBINER 0x10448000
-#define S5PV310_PA_UART (0x13800000)
+#define S5PV310_PA_COREPERI 0x10500000
+#define S5PV310_PA_GIC_CPU 0x10500100
+#define S5PV310_PA_TWD 0x10500600
+#define S5PV310_PA_GIC_DIST 0x10501000
+#define S5PV310_PA_L2CC 0x10502000
-#define S5P_PA_UART(x) (S5PV310_PA_UART + ((x) * S3C_UART_OFFSET))
-#define S5P_PA_UART0 S5P_PA_UART(0)
-#define S5P_PA_UART1 S5P_PA_UART(1)
-#define S5P_PA_UART2 S5P_PA_UART(2)
-#define S5P_PA_UART3 S5P_PA_UART(3)
-#define S5P_PA_UART4 S5P_PA_UART(4)
-
-#define S5P_SZ_UART SZ_256
-
-#define S5PV310_PA_IIC(x) (0x13860000 + ((x) * 0x10000))
-
-#define S5PV310_PA_TIMER (0x139D0000)
-#define S5P_PA_TIMER S5PV310_PA_TIMER
-
-#define S5PV310_PA_SDRAM (0x40000000)
-#define S5P_PA_SDRAM S5PV310_PA_SDRAM
+#define S5PV310_PA_MDMA 0x10810000
+#define S5PV310_PA_PDMA0 0x12680000
+#define S5PV310_PA_PDMA1 0x12690000
#define S5PV310_PA_SYSMMU_MDMA 0x10A40000
#define S5PV310_PA_SYSMMU_SSS 0x10A50000
@@ -125,8 +78,31 @@
#define S5PV310_PA_SYSMMU_MFC_L 0x13620000
#define S5PV310_PA_SYSMMU_MFC_R 0x13630000
-/* compatibiltiy defines. */
-#define S3C_PA_UART S5PV310_PA_UART
+#define S5PV310_PA_GPIO1 0x11400000
+#define S5PV310_PA_GPIO2 0x11000000
+#define S5PV310_PA_GPIO3 0x03860000
+
+#define S5PV310_PA_MIPI_CSIS0 0x11880000
+#define S5PV310_PA_MIPI_CSIS1 0x11890000
+
+#define S5PV310_PA_HSMMC(x) (0x12510000 + ((x) * 0x10000))
+
+#define S5PV310_PA_SROMC 0x12570000
+
+#define S5PV310_PA_UART 0x13800000
+
+#define S5PV310_PA_IIC(x) (0x13860000 + ((x) * 0x10000))
+
+#define S5PV310_PA_AC97 0x139A0000
+
+#define S5PV310_PA_TIMER 0x139D0000
+
+#define S5PV310_PA_SDRAM 0x40000000
+
+#define S5PV310_PA_SPDIF 0xE1100000
+
+/* Compatibiltiy Defines */
+
#define S3C_PA_HSMMC0 S5PV310_PA_HSMMC(0)
#define S3C_PA_HSMMC1 S5PV310_PA_HSMMC(1)
#define S3C_PA_HSMMC2 S5PV310_PA_HSMMC(2)
@@ -141,7 +117,28 @@
#define S3C_PA_IIC7 S5PV310_PA_IIC(7)
#define S3C_PA_RTC S5PV310_PA_RTC
#define S3C_PA_WDT S5PV310_PA_WATCHDOG
+
+#define S5P_PA_CHIPID S5PV310_PA_CHIPID
#define S5P_PA_MIPI_CSIS0 S5PV310_PA_MIPI_CSIS0
#define S5P_PA_MIPI_CSIS1 S5PV310_PA_MIPI_CSIS1
+#define S5P_PA_ONENAND S5PC210_PA_ONENAND
+#define S5P_PA_ONENAND_DMA S5PC210_PA_ONENAND_DMA
+#define S5P_PA_SDRAM S5PV310_PA_SDRAM
+#define S5P_PA_SROMC S5PV310_PA_SROMC
+#define S5P_PA_SYSCON S5PV310_PA_SYSCON
+#define S5P_PA_TIMER S5PV310_PA_TIMER
+
+/* UART */
+
+#define S3C_PA_UART S5PV310_PA_UART
+
+#define S5P_PA_UART(x) (S3C_PA_UART + ((x) * S3C_UART_OFFSET))
+#define S5P_PA_UART0 S5P_PA_UART(0)
+#define S5P_PA_UART1 S5P_PA_UART(1)
+#define S5P_PA_UART2 S5P_PA_UART(2)
+#define S5P_PA_UART3 S5P_PA_UART(3)
+#define S5P_PA_UART4 S5P_PA_UART(4)
+
+#define S5P_SZ_UART SZ_256
#endif /* __ASM_ARCH_MAP_H */
diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c
index 2123b96b5638..4303a86e6e38 100644
--- a/arch/arm/mach-shmobile/board-ag5evm.c
+++ b/arch/arm/mach-shmobile/board-ag5evm.c
@@ -454,6 +454,7 @@ static void __init ag5evm_init(void)
gpio_direction_output(GPIO_PORT217, 0);
mdelay(1);
gpio_set_value(GPIO_PORT217, 1);
+ mdelay(100);
/* LCD backlight controller */
gpio_request(GPIO_PORT235, NULL); /* RESET */
diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c
index 3cf0951caa2d..81d6536552a9 100644
--- a/arch/arm/mach-shmobile/board-ap4evb.c
+++ b/arch/arm/mach-shmobile/board-ap4evb.c
@@ -1303,7 +1303,7 @@ static void __init ap4evb_init(void)
lcdc_info.clock_source = LCDC_CLK_BUS;
lcdc_info.ch[0].interface_type = RGB18;
- lcdc_info.ch[0].clock_divider = 2;
+ lcdc_info.ch[0].clock_divider = 3;
lcdc_info.ch[0].flags = 0;
lcdc_info.ch[0].lcd_size_cfg.width = 152;
lcdc_info.ch[0].lcd_size_cfg.height = 91;
diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c
index fb4213a4e15a..1657eac5dde2 100644
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -303,7 +303,7 @@ static struct sh_mobile_lcdc_info lcdc_info = {
.lcd_cfg = mackerel_lcdc_modes,
.num_cfg = ARRAY_SIZE(mackerel_lcdc_modes),
.interface_type = RGB24,
- .clock_divider = 2,
+ .clock_divider = 3,
.flags = 0,
.lcd_size_cfg.width = 152,
.lcd_size_cfg.height = 91,
diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c
index ddd4a1b775f0..7e58904c1c8c 100644
--- a/arch/arm/mach-shmobile/clock-sh73a0.c
+++ b/arch/arm/mach-shmobile/clock-sh73a0.c
@@ -263,7 +263,7 @@ static struct clk div6_clks[DIV6_NR] = {
};
enum { MSTP001,
- MSTP125, MSTP118, MSTP116, MSTP100,
+ MSTP129, MSTP128, MSTP127, MSTP126, MSTP125, MSTP118, MSTP116, MSTP100,
MSTP219,
MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200,
MSTP331, MSTP329, MSTP325, MSTP323, MSTP312,
@@ -275,6 +275,10 @@ enum { MSTP001,
static struct clk mstp_clks[MSTP_NR] = {
[MSTP001] = MSTP(&div4_clks[DIV4_HP], SMSTPCR0, 1, 0), /* IIC2 */
+ [MSTP129] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 29, 0), /* CEU1 */
+ [MSTP128] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 28, 0), /* CSI2-RX1 */
+ [MSTP127] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 27, 0), /* CEU0 */
+ [MSTP126] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 26, 0), /* CSI2-RX0 */
[MSTP125] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR1, 25, 0), /* TMU0 */
[MSTP118] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 18, 0), /* DSITX0 */
[MSTP116] = MSTP(&div4_clks[DIV4_HP], SMSTPCR1, 16, 0), /* IIC0 */
@@ -306,6 +310,9 @@ static struct clk_lookup lookups[] = {
CLKDEV_CON_ID("r_clk", &r_clk),
/* DIV6 clocks */
+ CLKDEV_CON_ID("vck1_clk", &div6_clks[DIV6_VCK1]),
+ CLKDEV_CON_ID("vck2_clk", &div6_clks[DIV6_VCK2]),
+ CLKDEV_CON_ID("vck3_clk", &div6_clks[DIV6_VCK3]),
CLKDEV_ICK_ID("dsit_clk", "sh-mipi-dsi.0", &div6_clks[DIV6_DSIT]),
CLKDEV_ICK_ID("dsit_clk", "sh-mipi-dsi.1", &div6_clks[DIV6_DSIT]),
CLKDEV_ICK_ID("dsi0p_clk", "sh-mipi-dsi.0", &div6_clks[DIV6_DSI0P]),
@@ -313,11 +320,15 @@ static struct clk_lookup lookups[] = {
/* MSTP32 clocks */
CLKDEV_DEV_ID("i2c-sh_mobile.2", &mstp_clks[MSTP001]), /* I2C2 */
- CLKDEV_DEV_ID("sh_mobile_lcdc_fb.0", &mstp_clks[MSTP100]), /* LCDC0 */
+ CLKDEV_DEV_ID("sh_mobile_ceu.1", &mstp_clks[MSTP129]), /* CEU1 */
+ CLKDEV_DEV_ID("sh-mobile-csi2.1", &mstp_clks[MSTP128]), /* CSI2-RX1 */
+ CLKDEV_DEV_ID("sh_mobile_ceu.0", &mstp_clks[MSTP127]), /* CEU0 */
+ CLKDEV_DEV_ID("sh-mobile-csi2.0", &mstp_clks[MSTP126]), /* CSI2-RX0 */
CLKDEV_DEV_ID("sh_tmu.0", &mstp_clks[MSTP125]), /* TMU00 */
CLKDEV_DEV_ID("sh_tmu.1", &mstp_clks[MSTP125]), /* TMU01 */
- CLKDEV_DEV_ID("i2c-sh_mobile.0", &mstp_clks[MSTP116]), /* I2C0 */
CLKDEV_DEV_ID("sh-mipi-dsi.0", &mstp_clks[MSTP118]), /* DSITX */
+ CLKDEV_DEV_ID("i2c-sh_mobile.0", &mstp_clks[MSTP116]), /* I2C0 */
+ CLKDEV_DEV_ID("sh_mobile_lcdc_fb.0", &mstp_clks[MSTP100]), /* LCDC0 */
CLKDEV_DEV_ID("sh-sci.7", &mstp_clks[MSTP219]), /* SCIFA7 */
CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */
CLKDEV_DEV_ID("sh-sci.8", &mstp_clks[MSTP206]), /* SCIFB */
diff --git a/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt b/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt
index efd3687ba190..3029aba38688 100644
--- a/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt
+++ b/arch/arm/mach-shmobile/include/mach/head-ap4evb.txt
@@ -6,13 +6,10 @@ LIST "RWT Setting"
EW 0xE6020004, 0xA500
EW 0xE6030004, 0xA500
-DD 0x01001000, 0x01001000
-
LIST "GPIO Setting"
EB 0xE6051013, 0xA2
LIST "CPG"
-ED 0xE6150080, 0x00000180
ED 0xE61500C0, 0x00000002
WAIT 1, 0xFE40009C
@@ -37,6 +34,9 @@ ED 0xE615002C, 0x93000040
WAIT 1, 0xFE40009C
+LIST "SUB/USBClk"
+ED 0xE6150080, 0x00000180
+
LIST "BSC"
ED 0xFEC10000, 0x00E0001B
@@ -53,7 +53,7 @@ ED 0xFE400048, 0x20C18505
ED 0xFE40004C, 0x00110209
ED 0xFE400010, 0x00000087
-WAIT 10, 0xFE40009C
+WAIT 30, 0xFE40009C
ED 0xFE400084, 0x0000003F
EB 0xFE500000, 0x00
@@ -84,7 +84,7 @@ ED 0xE6150004, 0x80331050
WAIT 1, 0xFE40009C
-ED 0xE6150354, 0x00000002
+ED 0xFE400354, 0x01AD8002
LIST "SCIF0 - Serial port for earlyprintk"
EB 0xE6053098, 0x11
diff --git a/arch/arm/mach-shmobile/include/mach/head-mackerel.txt b/arch/arm/mach-shmobile/include/mach/head-mackerel.txt
index efd3687ba190..3029aba38688 100644
--- a/arch/arm/mach-shmobile/include/mach/head-mackerel.txt
+++ b/arch/arm/mach-shmobile/include/mach/head-mackerel.txt
@@ -6,13 +6,10 @@ LIST "RWT Setting"
EW 0xE6020004, 0xA500
EW 0xE6030004, 0xA500
-DD 0x01001000, 0x01001000
-
LIST "GPIO Setting"
EB 0xE6051013, 0xA2
LIST "CPG"
-ED 0xE6150080, 0x00000180
ED 0xE61500C0, 0x00000002
WAIT 1, 0xFE40009C
@@ -37,6 +34,9 @@ ED 0xE615002C, 0x93000040
WAIT 1, 0xFE40009C
+LIST "SUB/USBClk"
+ED 0xE6150080, 0x00000180
+
LIST "BSC"
ED 0xFEC10000, 0x00E0001B
@@ -53,7 +53,7 @@ ED 0xFE400048, 0x20C18505
ED 0xFE40004C, 0x00110209
ED 0xFE400010, 0x00000087
-WAIT 10, 0xFE40009C
+WAIT 30, 0xFE40009C
ED 0xFE400084, 0x0000003F
EB 0xFE500000, 0x00
@@ -84,7 +84,7 @@ ED 0xE6150004, 0x80331050
WAIT 1, 0xFE40009C
-ED 0xE6150354, 0x00000002
+ED 0xFE400354, 0x01AD8002
LIST "SCIF0 - Serial port for earlyprintk"
EB 0xE6053098, 0x11
diff --git a/arch/arm/mach-spear3xx/include/mach/spear320.h b/arch/arm/mach-spear3xx/include/mach/spear320.h
index cacf17a958cd..53677e464d4b 100644
--- a/arch/arm/mach-spear3xx/include/mach/spear320.h
+++ b/arch/arm/mach-spear3xx/include/mach/spear320.h
@@ -62,7 +62,7 @@
#define SPEAR320_SMII1_BASE 0xAB000000
#define SPEAR320_SMII1_SIZE 0x01000000
-#define SPEAR320_SOC_CONFIG_BASE 0xB4000000
+#define SPEAR320_SOC_CONFIG_BASE 0xB3000000
#define SPEAR320_SOC_CONFIG_SIZE 0x00000070
/* Interrupt registers offsets and masks */
#define INT_STS_MASK_REG 0x04
diff --git a/arch/arm/mach-tegra/include/mach/kbc.h b/arch/arm/mach-tegra/include/mach/kbc.h
index 66ad2760c621..04c779832c78 100644
--- a/arch/arm/mach-tegra/include/mach/kbc.h
+++ b/arch/arm/mach-tegra/include/mach/kbc.h
@@ -57,5 +57,6 @@ struct tegra_kbc_platform_data {
const struct matrix_keymap_data *keymap_data;
bool wakeup;
+ bool use_fn_map;
};
#endif
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 170c9bb95866..f2ce38e085d2 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -49,7 +49,13 @@ static inline void cache_wait(void __iomem *reg, unsigned long mask)
static inline void cache_sync(void)
{
void __iomem *base = l2x0_base;
+
+#ifdef CONFIG_ARM_ERRATA_753970
+ /* write to an unmmapped register */
+ writel_relaxed(0, base + L2X0_DUMMY_REG);
+#else
writel_relaxed(0, base + L2X0_CACHE_SYNC);
+#endif
cache_wait(base + L2X0_CACHE_SYNC, 1);
}
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 0c1172b56b4e..8e3356239136 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -264,6 +264,12 @@ __v7_setup:
orreq r10, r10, #1 << 6 @ set bit #6
mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register
#endif
+#ifdef CONFIG_ARM_ERRATA_751472
+ cmp r6, #0x30 @ present prior to r3p0
+ mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register
+ orrlt r10, r10, #1 << 11 @ set bit #11
+ mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register
+#endif
3: mov r10, #0
#ifdef HARVARD_CACHE
diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index 459b319a9fad..69ddc9f76c13 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -32,7 +32,6 @@
#include <plat/mailbox.h>
-static struct workqueue_struct *mboxd;
static struct omap_mbox **mboxes;
static int mbox_configured;
@@ -197,7 +196,7 @@ static void __mbox_rx_interrupt(struct omap_mbox *mbox)
/* no more messages in the fifo. clear IRQ source. */
ack_mbox_irq(mbox, IRQ_RX);
nomem:
- queue_work(mboxd, &mbox->rxq->work);
+ schedule_work(&mbox->rxq->work);
}
static irqreturn_t mbox_interrupt(int irq, void *p)
@@ -307,7 +306,7 @@ static void omap_mbox_fini(struct omap_mbox *mbox)
if (!--mbox->use_count) {
free_irq(mbox->irq, mbox);
tasklet_kill(&mbox->txq->tasklet);
- flush_work(&mbox->rxq->work);
+ flush_work_sync(&mbox->rxq->work);
mbox_queue_free(mbox->txq);
mbox_queue_free(mbox->rxq);
}
@@ -322,15 +321,18 @@ static void omap_mbox_fini(struct omap_mbox *mbox)
struct omap_mbox *omap_mbox_get(const char *name, struct notifier_block *nb)
{
- struct omap_mbox *mbox;
- int ret;
+ struct omap_mbox *_mbox, *mbox = NULL;
+ int i, ret;
if (!mboxes)
return ERR_PTR(-EINVAL);
- for (mbox = *mboxes; mbox; mbox++)
- if (!strcmp(mbox->name, name))
+ for (i = 0; (_mbox = mboxes[i]); i++) {
+ if (!strcmp(_mbox->name, name)) {
+ mbox = _mbox;
break;
+ }
+ }
if (!mbox)
return ERR_PTR(-ENOENT);
@@ -406,10 +408,6 @@ static int __init omap_mbox_init(void)
if (err)
return err;
- mboxd = create_workqueue("mboxd");
- if (!mboxd)
- return -ENOMEM;
-
/* kfifo size sanity check: alignment and minimal size */
mbox_kfifo_size = ALIGN(mbox_kfifo_size, sizeof(mbox_msg_t));
mbox_kfifo_size = max_t(unsigned int, mbox_kfifo_size,
@@ -421,7 +419,6 @@ subsys_initcall(omap_mbox_init);
static void __exit omap_mbox_exit(void)
{
- destroy_workqueue(mboxd);
class_unregister(&omap_mbox_class);
}
module_exit(omap_mbox_exit);
diff --git a/arch/arm/plat-s5p/dev-uart.c b/arch/arm/plat-s5p/dev-uart.c
index 6a7342886171..afaf87fdb93e 100644
--- a/arch/arm/plat-s5p/dev-uart.c
+++ b/arch/arm/plat-s5p/dev-uart.c
@@ -28,7 +28,7 @@
static struct resource s5p_uart0_resource[] = {
[0] = {
.start = S5P_PA_UART0,
- .end = S5P_PA_UART0 + S5P_SZ_UART,
+ .end = S5P_PA_UART0 + S5P_SZ_UART - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
@@ -51,7 +51,7 @@ static struct resource s5p_uart0_resource[] = {
static struct resource s5p_uart1_resource[] = {
[0] = {
.start = S5P_PA_UART1,
- .end = S5P_PA_UART1 + S5P_SZ_UART,
+ .end = S5P_PA_UART1 + S5P_SZ_UART - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
@@ -74,7 +74,7 @@ static struct resource s5p_uart1_resource[] = {
static struct resource s5p_uart2_resource[] = {
[0] = {
.start = S5P_PA_UART2,
- .end = S5P_PA_UART2 + S5P_SZ_UART,
+ .end = S5P_PA_UART2 + S5P_SZ_UART - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
@@ -98,7 +98,7 @@ static struct resource s5p_uart3_resource[] = {
#if CONFIG_SERIAL_SAMSUNG_UARTS > 3
[0] = {
.start = S5P_PA_UART3,
- .end = S5P_PA_UART3 + S5P_SZ_UART,
+ .end = S5P_PA_UART3 + S5P_SZ_UART - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
@@ -123,7 +123,7 @@ static struct resource s5p_uart4_resource[] = {
#if CONFIG_SERIAL_SAMSUNG_UARTS > 4
[0] = {
.start = S5P_PA_UART4,
- .end = S5P_PA_UART4 + S5P_SZ_UART,
+ .end = S5P_PA_UART4 + S5P_SZ_UART - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
@@ -148,7 +148,7 @@ static struct resource s5p_uart5_resource[] = {
#if CONFIG_SERIAL_SAMSUNG_UARTS > 5
[0] = {
.start = S5P_PA_UART5,
- .end = S5P_PA_UART5 + S5P_SZ_UART,
+ .end = S5P_PA_UART5 + S5P_SZ_UART - 1,
.flags = IORESOURCE_MEM,
},
[1] = {
diff --git a/arch/arm/plat-samsung/dev-ts.c b/arch/arm/plat-samsung/dev-ts.c
index 236ef8427d7d..3e4bd8147bf4 100644
--- a/arch/arm/plat-samsung/dev-ts.c
+++ b/arch/arm/plat-samsung/dev-ts.c
@@ -58,4 +58,3 @@ void __init s3c24xx_ts_set_platdata(struct s3c2410_ts_mach_info *pd)
s3c_device_ts.dev.platform_data = npd;
}
-EXPORT_SYMBOL(s3c24xx_ts_set_platdata);
diff --git a/arch/arm/plat-samsung/dev-uart.c b/arch/arm/plat-samsung/dev-uart.c
index 3776cd952450..5928105490fa 100644
--- a/arch/arm/plat-samsung/dev-uart.c
+++ b/arch/arm/plat-samsung/dev-uart.c
@@ -15,6 +15,8 @@
#include <linux/kernel.h>
#include <linux/platform_device.h>
+#include <plat/devs.h>
+
/* uart devices */
static struct platform_device s3c24xx_uart_device0 = {
diff --git a/arch/arm/plat-spear/include/plat/uncompress.h b/arch/arm/plat-spear/include/plat/uncompress.h
index 99ba6789cc97..6dd455bafdfd 100644
--- a/arch/arm/plat-spear/include/plat/uncompress.h
+++ b/arch/arm/plat-spear/include/plat/uncompress.h
@@ -24,10 +24,10 @@ static inline void putc(int c)
{
void __iomem *base = (void __iomem *)SPEAR_DBG_UART_BASE;
- while (readl(base + UART01x_FR) & UART01x_FR_TXFF)
+ while (readl_relaxed(base + UART01x_FR) & UART01x_FR_TXFF)
barrier();
- writel(c, base + UART01x_DR);
+ writel_relaxed(c, base + UART01x_DR);
}
static inline void flush(void)
diff --git a/arch/arm/plat-spear/include/plat/vmalloc.h b/arch/arm/plat-spear/include/plat/vmalloc.h
index 09e9372aea21..8c8b24d07046 100644
--- a/arch/arm/plat-spear/include/plat/vmalloc.h
+++ b/arch/arm/plat-spear/include/plat/vmalloc.h
@@ -14,6 +14,6 @@
#ifndef __PLAT_VMALLOC_H
#define __PLAT_VMALLOC_H
-#define VMALLOC_END 0xF0000000
+#define VMALLOC_END 0xF0000000UL
#endif /* __PLAT_VMALLOC_H */
diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index c9113619029f..8d73724c0092 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c
@@ -114,16 +114,14 @@ u32 arch_gettimeoffset(void)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
#ifdef CONFIG_CORE_TIMER_IRQ_L1
__attribute__((l1_text))
#endif
irqreturn_t timer_interrupt(int irq, void *dummy)
{
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifdef CONFIG_IPIPE
update_root_process_times(get_irq_regs());
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 4122678529c0..c40d07f708e8 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -136,7 +136,7 @@ SECTIONS
. = ALIGN(16);
INIT_DATA_SECTION(16)
- PERCPU(4)
+ PERCPU(32, 4)
.exit.data :
{
diff --git a/arch/blackfin/lib/outs.S b/arch/blackfin/lib/outs.S
index 250f4d4b9436..06a5e674401f 100644
--- a/arch/blackfin/lib/outs.S
+++ b/arch/blackfin/lib/outs.S
@@ -13,6 +13,8 @@
.align 2
ENTRY(_outsl)
+ CC = R2 == 0;
+ IF CC JUMP 1f;
P0 = R0; /* P0 = port */
P1 = R1; /* P1 = address */
P2 = R2; /* P2 = count */
@@ -20,10 +22,12 @@ ENTRY(_outsl)
LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2;
.Llong_loop_s: R0 = [P1++];
.Llong_loop_e: [P0] = R0;
- RTS;
+1: RTS;
ENDPROC(_outsl)
ENTRY(_outsw)
+ CC = R2 == 0;
+ IF CC JUMP 1f;
P0 = R0; /* P0 = port */
P1 = R1; /* P1 = address */
P2 = R2; /* P2 = count */
@@ -31,10 +35,12 @@ ENTRY(_outsw)
LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2;
.Lword_loop_s: R0 = W[P1++];
.Lword_loop_e: W[P0] = R0;
- RTS;
+1: RTS;
ENDPROC(_outsw)
ENTRY(_outsb)
+ CC = R2 == 0;
+ IF CC JUMP 1f;
P0 = R0; /* P0 = port */
P1 = R1; /* P1 = address */
P2 = R2; /* P2 = count */
@@ -42,10 +48,12 @@ ENTRY(_outsb)
LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2;
.Lbyte_loop_s: R0 = B[P1++];
.Lbyte_loop_e: B[P0] = R0;
- RTS;
+1: RTS;
ENDPROC(_outsb)
ENTRY(_outsw_8)
+ CC = R2 == 0;
+ IF CC JUMP 1f;
P0 = R0; /* P0 = port */
P1 = R1; /* P1 = address */
P2 = R2; /* P2 = count */
@@ -56,5 +64,5 @@ ENTRY(_outsw_8)
R0 = R0 << 8;
R0 = R0 + R1;
.Lword8_loop_e: W[P0] = R0;
- RTS;
+1: RTS;
ENDPROC(_outsw_8)
diff --git a/arch/blackfin/mach-common/cache.S b/arch/blackfin/mach-common/cache.S
index 790c767ca95a..ab4a925a443e 100644
--- a/arch/blackfin/mach-common/cache.S
+++ b/arch/blackfin/mach-common/cache.S
@@ -58,6 +58,8 @@
1:
.ifeqs "\flushins", BROK_FLUSH_INST
\flushins [P0++];
+ nop;
+ nop;
2: nop;
.else
2: \flushins [P0++];
diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c
index 00eb36f8debf..20c85b5dc7d0 100644
--- a/arch/cris/arch-v10/kernel/time.c
+++ b/arch/cris/arch-v10/kernel/time.c
@@ -140,7 +140,7 @@ stop_watchdog(void)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
//static unsigned short myjiff; /* used by our debug routine print_timestamp */
@@ -176,7 +176,7 @@ timer_interrupt(int irq, void *dev_id)
/* call the real timer interrupt handler */
- do_timer(1);
+ xtime_update(1);
cris_do_profile(regs); /* Save profiling information */
return IRQ_HANDLED;
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 84fed3b4b079..4c9e3e1ba5d1 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -26,7 +26,9 @@
#define FLUSH_ALL (void*)0xffffffff
/* Vector of locks used for various atomic operations */
-spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED};
+spinlock_t cris_atomic_locks[] = {
+ [0 ... LOCK_COUNT - 1] = __SPIN_LOCK_UNLOCKED(cris_atomic_locks)
+};
/* CPU masks */
cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index a545211e999d..bb978ede8985 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -183,7 +183,7 @@ void handle_watchdog_bite(struct pt_regs *regs)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick.
+ * as well as call the "xtime_update()" routine every clocktick.
*/
extern void cris_do_profile(struct pt_regs *regs);
@@ -216,9 +216,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
/* Call the real timer interrupt handler */
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
return IRQ_HANDLED;
}
diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S
index 442218980db0..728bbd9e7d4c 100644
--- a/arch/cris/kernel/vmlinux.lds.S
+++ b/arch/cris/kernel/vmlinux.lds.S
@@ -72,11 +72,6 @@ SECTIONS
INIT_TEXT_SECTION(PAGE_SIZE)
.init.data : { INIT_DATA }
.init.setup : { INIT_SETUP(16) }
-#ifdef CONFIG_ETRAX_ARCH_V32
- __start___param = .;
- __param : { *(__param) }
- __stop___param = .;
-#endif
.initcall.init : {
INIT_CALLS
}
@@ -107,7 +102,7 @@ SECTIONS
#endif
__vmlinux_end = .; /* Last address of the physical file. */
#ifdef CONFIG_ETRAX_ARCH_V32
- PERCPU(PAGE_SIZE)
+ PERCPU(32, PAGE_SIZE)
.init.ramfs : {
INIT_RAM_FS
diff --git a/arch/frv/include/asm/futex.h b/arch/frv/include/asm/futex.h
index 08b3d1da3583..4bea27f50a7a 100644
--- a/arch/frv/include/asm/futex.h
+++ b/arch/frv/include/asm/futex.h
@@ -7,10 +7,11 @@
#include <asm/errno.h>
#include <asm/uaccess.h>
-extern int futex_atomic_op_inuser(int encoded_op, int __user *uaddr);
+extern int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr);
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
return -ENOSYS;
}
diff --git a/arch/frv/kernel/futex.c b/arch/frv/kernel/futex.c
index 14f64b054c7e..d155ca9e5098 100644
--- a/arch/frv/kernel/futex.c
+++ b/arch/frv/kernel/futex.c
@@ -18,7 +18,7 @@
* the various futex operations; MMU fault checking is ignored under no-MMU
* conditions
*/
-static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, int *_oldval)
+static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr, int *_oldval)
{
int oldval, ret;
@@ -50,7 +50,7 @@ static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr, int *_o
return ret;
}
-static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, int *_oldval)
+static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr, int *_oldval)
{
int oldval, ret;
@@ -83,7 +83,7 @@ static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr, int *_o
return ret;
}
-static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, int *_oldval)
+static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr, int *_oldval)
{
int oldval, ret;
@@ -116,7 +116,7 @@ static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr, int *_ol
return ret;
}
-static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, int *_oldval)
+static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr, int *_oldval)
{
int oldval, ret;
@@ -149,7 +149,7 @@ static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr, int *_o
return ret;
}
-static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, int *_oldval)
+static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr, int *_oldval)
{
int oldval, ret;
@@ -186,7 +186,7 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr, int *_o
/*
* do the futex operations
*/
-int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -197,7 +197,7 @@ int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index 0ddbbae83cb2..b457de496b70 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -50,21 +50,13 @@ static struct irqaction timer_irq = {
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dummy)
{
profile_tick(CPU_PROFILING);
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don't need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
- do_timer(1);
+ xtime_update(1);
#ifdef CONFIG_HEARTBEAT
static unsigned short n;
@@ -72,8 +64,6 @@ static irqreturn_t timer_interrupt(int irq, void *dummy)
__set_LEDS(n);
#endif /* CONFIG_HEARTBEAT */
- write_sequnlock(&xtime_lock);
-
update_process_times(user_mode(get_irq_regs()));
return IRQ_HANDLED;
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S
index 8b973f3cc90e..0daae8af5787 100644
--- a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S
@@ -37,7 +37,7 @@ SECTIONS
_einittext = .;
INIT_DATA_SECTION(8)
- PERCPU(4096)
+ PERCPU(L1_CACHE_BYTES, 4096)
. = ALIGN(PAGE_SIZE);
__init_end = .;
diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c
index 165005aff9df..32263a138aa6 100644
--- a/arch/h8300/kernel/time.c
+++ b/arch/h8300/kernel/time.c
@@ -35,9 +35,7 @@ void h8300_timer_tick(void)
{
if (current->pid)
profile_tick(CPU_PROFILING);
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
}
diff --git a/arch/h8300/kernel/timer/timer8.c b/arch/h8300/kernel/timer/timer8.c
index 3946c0fa8374..7a1533fad47d 100644
--- a/arch/h8300/kernel/timer/timer8.c
+++ b/arch/h8300/kernel/timer/timer8.c
@@ -61,7 +61,7 @@
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h
index c7f0f062239c..8428525ddb22 100644
--- a/arch/ia64/include/asm/futex.h
+++ b/arch/ia64/include/asm/futex.h
@@ -46,7 +46,7 @@ do { \
} while (0)
static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -56,7 +56,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -100,23 +100,26 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
{
- register unsigned long r8 __asm ("r8");
+ register unsigned long r8 __asm ("r8") = 0;
+ unsigned long prev;
__asm__ __volatile__(
" mf;; \n"
" mov ar.ccv=%3;; \n"
"[1:] cmpxchg4.acq %0=[%1],%2,ar.ccv \n"
" .xdata4 \"__ex_table\", 1b-., 2f-. \n"
"[2:]"
- : "=r" (r8)
+ : "=r" (prev)
: "r" (uaddr), "r" (newval),
"rO" ((long) (unsigned) oldval)
: "memory");
+ *uval = prev;
return r8;
}
}
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 215d5454c7d3..3027e7516d85 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -25,20 +25,8 @@
#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
#endif
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
#include <asm/intrinsics.h>
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
#define RWSEM_ACTIVE_BIAS (1L)
#define RWSEM_ACTIVE_MASK (0xffffffffL)
@@ -46,26 +34,6 @@ struct rw_semaphore {
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-static inline void
-init_rwsem (struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
-
/*
* lock for reading
*/
@@ -174,9 +142,4 @@ __downgrade_write (struct rw_semaphore *sem)
#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h
index 96fc62366aa4..ed28bcd5bb85 100644
--- a/arch/ia64/include/asm/xen/hypercall.h
+++ b/arch/ia64/include/asm/xen/hypercall.h
@@ -107,7 +107,7 @@ extern unsigned long __hypercall(unsigned long a1, unsigned long a2,
static inline int
xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
{
- return _hypercall2(int, sched_op_new, cmd, arg);
+ return _hypercall2(int, sched_op, cmd, arg);
}
static inline long
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 9702fa92489e..156ad803d5b7 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -190,19 +190,10 @@ timer_interrupt (int irq, void *dev_id)
new_itm += local_cpu_data->itm_delta;
- if (smp_processor_id() == time_keeper_id) {
- /*
- * Here we are in the timer irq handler. We have irqs locally
- * disabled, but we don't know if the timer_bh is running on
- * another CPU. We need to avoid to SMP race by acquiring the
- * xtime_lock.
- */
- write_seqlock(&xtime_lock);
- do_timer(1);
- local_cpu_data->itm_next = new_itm;
- write_sequnlock(&xtime_lock);
- } else
- local_cpu_data->itm_next = new_itm;
+ if (smp_processor_id() == time_keeper_id)
+ xtime_update(1);
+
+ local_cpu_data->itm_next = new_itm;
if (time_after(new_itm, ia64_get_itc()))
break;
@@ -222,7 +213,7 @@ skip_process_time_accounting:
* comfort, we increase the safety margin by
* intentionally dropping the next tick(s). We do NOT
* update itm.next because that would force us to call
- * do_timer() which in turn would let our clock run
+ * xtime_update() which in turn would let our clock run
* too fast (with the potentially devastating effect
* of losing monotony of time).
*/
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 5a4d044dcb1c..787de4a77d82 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -198,7 +198,7 @@ SECTIONS {
/* Per-cpu data: */
. = ALIGN(PERCPU_PAGE_SIZE);
- PERCPU_VADDR(PERCPU_ADDR, :percpu)
+ PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
__phys_per_cpu_start = __per_cpu_load;
/*
* ensure percpu data fits
diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c
index fd66b048c6fa..419c8620945a 100644
--- a/arch/ia64/xen/suspend.c
+++ b/arch/ia64/xen/suspend.c
@@ -37,19 +37,14 @@ xen_mm_unpin_all(void)
/* nothing */
}
-void xen_pre_device_suspend(void)
-{
- /* nothing */
-}
-
void
-xen_pre_suspend()
+xen_arch_pre_suspend()
{
/* nothing */
}
void
-xen_post_suspend(int suspend_cancelled)
+xen_arch_post_suspend(int suspend_cancelled)
{
if (suspend_cancelled)
return;
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
index c1c544513e8d..1f8244a78bee 100644
--- a/arch/ia64/xen/time.c
+++ b/arch/ia64/xen/time.c
@@ -139,14 +139,11 @@ consider_steal_time(unsigned long new_itm)
run_posix_cpu_timers(p);
delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
- if (cpu == time_keeper_id) {
- write_seqlock(&xtime_lock);
- do_timer(stolen + blocked);
- local_cpu_data->itm_next = delta_itm + new_itm;
- write_sequnlock(&xtime_lock);
- } else {
- local_cpu_data->itm_next = delta_itm + new_itm;
- }
+ if (cpu == time_keeper_id)
+ xtime_update(stolen + blocked);
+
+ local_cpu_data->itm_next = delta_itm + new_itm;
+
per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen;
per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked;
}
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index bda86820bffd..84dd04048db9 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -107,15 +107,14 @@ u32 arch_gettimeoffset(void)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
#ifndef CONFIG_SMP
profile_tick(CPU_PROFILING);
#endif
- /* XXX FIXME. Uh, the xtime_lock should be held here, no? */
- do_timer(1);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index 7da94eaa082b..c194d64cdbb9 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -53,7 +53,7 @@ SECTIONS
__init_begin = .;
INIT_TEXT_SECTION(PAGE_SIZE)
INIT_DATA_SECTION(16)
- PERCPU(PAGE_SIZE)
+ PERCPU(32, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
__init_end = .;
/* freed after init ends here */
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 9fe6fefb5e14..1edd95095cb4 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -45,8 +45,8 @@ extern int bvme6000_set_clock_mmss (unsigned long);
extern void bvme6000_reset (void);
void bvme6000_set_vectors (void);
-/* Save tick handler routine pointer, will point to do_timer() in
- * kernel/sched.c, called via bvme6000_process_int() */
+/* Save tick handler routine pointer, will point to xtime_update() in
+ * kernel/timer/timekeeping.c, called via bvme6000_process_int() */
static irq_handler_t tick_handler;
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 06438dac08ff..18b34ee5db3b 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -37,11 +37,11 @@ static inline int set_rtc_mmss(unsigned long nowtime)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dummy)
{
- do_timer(1);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
profile_tick(CPU_PROFILING);
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 100baaa692a1..6cb9c3a9b6c9 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -46,8 +46,8 @@ extern void mvme147_reset (void);
static int bcd2int (unsigned char b);
-/* Save tick handler routine pointer, will point to do_timer() in
- * kernel/sched.c, called via mvme147_process_int() */
+/* Save tick handler routine pointer, will point to xtime_update() in
+ * kernel/time/timekeeping.c, called via mvme147_process_int() */
irq_handler_t tick_handler;
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 11edf61cc2c4..0b28e2621653 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -51,8 +51,8 @@ extern void mvme16x_reset (void);
int bcd2int (unsigned char b);
-/* Save tick handler routine pointer, will point to do_timer() in
- * kernel/sched.c, called via mvme16x_process_int() */
+/* Save tick handler routine pointer, will point to xtime_update() in
+ * kernel/time/timekeeping.c, called via mvme16x_process_int() */
static irq_handler_t tick_handler;
diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c
index 2d9e21bd313a..6464ad3ae3e6 100644
--- a/arch/m68k/sun3/sun3ints.c
+++ b/arch/m68k/sun3/sun3ints.c
@@ -66,7 +66,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id)
#ifdef CONFIG_SUN3
intersil_clear();
#endif
- do_timer(1);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
if (!(kstat_cpu(0).irqs[irq] % 20))
sun3_leds(led_pattern[(kstat_cpu(0).irqs[irq] % 160) / 20]);
diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index d6ac2a43453c..6623909f70e6 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -36,7 +36,7 @@ static inline int set_rtc_mmss(unsigned long nowtime)
#ifndef CONFIG_GENERIC_CLOCKEVENTS
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
irqreturn_t arch_timer_interrupt(int irq, void *dummy)
{
@@ -44,11 +44,7 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy)
if (current->pid)
profile_tick(CPU_PROFILING);
- write_seqlock(&xtime_lock);
-
- do_timer(1);
-
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/microblaze/include/asm/futex.h b/arch/microblaze/include/asm/futex.h
index ad3fd61b2fe7..b0526d2716fa 100644
--- a/arch/microblaze/include/asm/futex.h
+++ b/arch/microblaze/include/asm/futex.h
@@ -29,7 +29,7 @@
})
static inline int
-futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -39,7 +39,7 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -94,31 +94,34 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- int prev, cmp;
+ int ret = 0, cmp;
+ u32 prev;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- __asm__ __volatile__ ("1: lwx %0, %2, r0; \
- cmp %1, %0, %3; \
- beqi %1, 3f; \
- 2: swx %4, %2, r0; \
- addic %1, r0, 0; \
- bnei %1, 1b; \
+ __asm__ __volatile__ ("1: lwx %1, %3, r0; \
+ cmp %2, %1, %4; \
+ beqi %2, 3f; \
+ 2: swx %5, %3, r0; \
+ addic %2, r0, 0; \
+ bnei %2, 1b; \
3: \
.section .fixup,\"ax\"; \
4: brid 3b; \
- addik %0, r0, %5; \
+ addik %0, r0, %6; \
.previous; \
.section __ex_table,\"a\"; \
.word 1b,4b,2b,4b; \
.previous;" \
- : "=&r" (prev), "=&r"(cmp) \
+ : "+r" (ret), "=&r" (prev), "=&r"(cmp) \
: "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT));
- return prev;
+ *uval = prev;
+ return ret;
}
#endif /* __KERNEL__ */
diff --git a/arch/microblaze/include/asm/pci-bridge.h b/arch/microblaze/include/asm/pci-bridge.h
index 0c68764ab547..10717669e0c2 100644
--- a/arch/microblaze/include/asm/pci-bridge.h
+++ b/arch/microblaze/include/asm/pci-bridge.h
@@ -104,11 +104,22 @@ struct pci_controller {
int global_number; /* PCI domain number */
};
+#ifdef CONFIG_PCI
static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus)
{
return bus->sysdata;
}
+static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
+{
+ struct pci_controller *host;
+
+ if (bus->self)
+ return pci_device_to_OF_node(bus->self);
+ host = pci_bus_to_host(bus);
+ return host ? host->dn : NULL;
+}
+
static inline int isa_vaddr_is_ioport(void __iomem *address)
{
/* No specific ISA handling on ppc32 at this stage, it
@@ -116,6 +127,7 @@ static inline int isa_vaddr_is_ioport(void __iomem *address)
*/
return 0;
}
+#endif /* CONFIG_PCI */
/* These are used for config access before all the PCI probing
has been done. */
diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 2e72af078b05..d0890d36ef61 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -64,21 +64,6 @@ extern void kdump_move_device_tree(void);
/* CPU OF node matching */
struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
-/**
- * of_irq_map_pci - Resolve the interrupt for a PCI device
- * @pdev: the device whose interrupt is to be resolved
- * @out_irq: structure of_irq filled by this function
- *
- * This function resolves the PCI interrupt for a given PCI device. If a
- * device-node exists for a given pci_dev, it will use normal OF tree
- * walking. If not, it will implement standard swizzling and walk up the
- * PCI tree until an device-node is found, at which point it will finish
- * resolving using the OF tree walking.
- */
-struct pci_dev;
-struct of_irq;
-extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
-
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c
index 9ae24f4b882b..47187cc2cf00 100644
--- a/arch/microblaze/kernel/prom_parse.c
+++ b/arch/microblaze/kernel/prom_parse.c
@@ -2,88 +2,11 @@
#include <linux/kernel.h>
#include <linux/string.h>
-#include <linux/pci_regs.h>
#include <linux/module.h>
#include <linux/ioport.h>
#include <linux/etherdevice.h>
#include <linux/of_address.h>
#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-
-#ifdef CONFIG_PCI
-int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
-{
- struct device_node *dn, *ppnode;
- struct pci_dev *ppdev;
- u32 lspec;
- u32 laddr[3];
- u8 pin;
- int rc;
-
- /* Check if we have a device node, if yes, fallback to standard OF
- * parsing
- */
- dn = pci_device_to_OF_node(pdev);
- if (dn)
- return of_irq_map_one(dn, 0, out_irq);
-
- /* Ok, we don't, time to have fun. Let's start by building up an
- * interrupt spec. we assume #interrupt-cells is 1, which is standard
- * for PCI. If you do different, then don't use that routine.
- */
- rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
- if (rc != 0)
- return rc;
- /* No pin, exit */
- if (pin == 0)
- return -ENODEV;
-
- /* Now we walk up the PCI tree */
- lspec = pin;
- for (;;) {
- /* Get the pci_dev of our parent */
- ppdev = pdev->bus->self;
-
- /* Ouch, it's a host bridge... */
- if (ppdev == NULL) {
- struct pci_controller *host;
- host = pci_bus_to_host(pdev->bus);
- ppnode = host ? host->dn : NULL;
- /* No node for host bridge ? give up */
- if (ppnode == NULL)
- return -EINVAL;
- } else
- /* We found a P2P bridge, check if it has a node */
- ppnode = pci_device_to_OF_node(ppdev);
-
- /* Ok, we have found a parent with a device-node, hand over to
- * the OF parsing code.
- * We build a unit address from the linux device to be used for
- * resolution. Note that we use the linux bus number which may
- * not match your firmware bus numbering.
- * Fortunately, in most cases, interrupt-map-mask doesn't
- * include the bus number as part of the matching.
- * You should still be careful about that though if you intend
- * to rely on this function (you ship a firmware that doesn't
- * create device nodes for all PCI devices).
- */
- if (ppnode)
- break;
-
- /* We can only get here if we hit a P2P bridge with no node,
- * let's do standard swizzling and try again
- */
- lspec = pci_swizzle_interrupt_pin(pdev, lspec);
- pdev = ppdev;
- }
-
- laddr[0] = (pdev->bus->number << 16)
- | (pdev->devfn << 8);
- laddr[1] = laddr[2] = 0;
- return of_irq_map_raw(ppnode, &lspec, 1, laddr, out_irq);
-}
-EXPORT_SYMBOL_GPL(of_irq_map_pci);
-#endif /* CONFIG_PCI */
void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
unsigned long *busno, unsigned long *phys, unsigned long *size)
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index e363615d6798..1e01a1253631 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -29,6 +29,7 @@
#include <linux/slab.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/of_pci.h>
#include <asm/processor.h>
#include <asm/io.h>
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f5ecc0566bc2..d88983516e26 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -4,6 +4,7 @@ config MIPS
select HAVE_GENERIC_DMA_COHERENT
select HAVE_IDE
select HAVE_OPROFILE
+ select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
select HAVE_ARCH_KGDB
@@ -208,6 +209,7 @@ config MACH_JZ4740
select ARCH_REQUIRE_GPIOLIB
select SYS_HAS_EARLY_PRINTK
select HAVE_PWM
+ select HAVE_CLK
config LASAT
bool "LASAT Networks platforms"
@@ -333,6 +335,8 @@ config PNX8550_STB810
config PMC_MSP
bool "PMC-Sierra MSP chipsets"
depends on EXPERIMENTAL
+ select CEVT_R4K
+ select CSRC_R4K
select DMA_NONCOHERENT
select SWAP_IO_SPACE
select NO_EXCEPT_FILL
diff --git a/arch/mips/alchemy/mtx-1/board_setup.c b/arch/mips/alchemy/mtx-1/board_setup.c
index 6398fa95905c..40b84b991191 100644
--- a/arch/mips/alchemy/mtx-1/board_setup.c
+++ b/arch/mips/alchemy/mtx-1/board_setup.c
@@ -54,8 +54,8 @@ int mtx1_pci_idsel(unsigned int devsel, int assert);
static void mtx1_reset(char *c)
{
- /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */
- au_writel(0x00000000, 0xAE00001C);
+ /* Jump to the reset vector */
+ __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
}
static void mtx1_power_off(void)
diff --git a/arch/mips/alchemy/mtx-1/platform.c b/arch/mips/alchemy/mtx-1/platform.c
index e30e42add697..956f946218c5 100644
--- a/arch/mips/alchemy/mtx-1/platform.c
+++ b/arch/mips/alchemy/mtx-1/platform.c
@@ -28,6 +28,8 @@
#include <linux/mtd/physmap.h>
#include <mtd/mtd-abi.h>
+#include <asm/mach-au1x00/au1xxx_eth.h>
+
static struct gpio_keys_button mtx1_gpio_button[] = {
{
.gpio = 207,
@@ -140,10 +142,17 @@ static struct __initdata platform_device * mtx1_devs[] = {
&mtx1_mtd,
};
+static struct au1000_eth_platform_data mtx1_au1000_eth0_pdata = {
+ .phy_search_highest_addr = 1,
+ .phy1_search_mac0 = 1,
+};
+
static int __init mtx1_register_devices(void)
{
int rc;
+ au1xxx_override_eth_cfg(0, &mtx1_au1000_eth0_pdata);
+
rc = gpio_request(mtx1_gpio_button[0].gpio,
mtx1_gpio_button[0].desc);
if (rc < 0) {
diff --git a/arch/mips/alchemy/xxs1500/board_setup.c b/arch/mips/alchemy/xxs1500/board_setup.c
index b43c918925d3..80c521e5290d 100644
--- a/arch/mips/alchemy/xxs1500/board_setup.c
+++ b/arch/mips/alchemy/xxs1500/board_setup.c
@@ -36,8 +36,8 @@
static void xxs1500_reset(char *c)
{
- /* Hit BCSR.SYSTEM_CONTROL[SW_RST] */
- au_writel(0x00000000, 0xAE00001C);
+ /* Jump to the reset vector */
+ __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000));
}
static void xxs1500_power_off(void)
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index b9cce90346cf..6ebf1734b411 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -75,7 +75,7 @@
}
static inline int
-futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -85,7 +85,7 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -132,11 +132,13 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- int retval;
+ int ret = 0;
+ u32 val;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
if (cpu_has_llsc && R10000_LLSC_WAR) {
@@ -145,25 +147,25 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
" .set push \n"
" .set noat \n"
" .set mips3 \n"
- "1: ll %0, %2 \n"
- " bne %0, %z3, 3f \n"
+ "1: ll %1, %3 \n"
+ " bne %1, %z4, 3f \n"
" .set mips0 \n"
- " move $1, %z4 \n"
+ " move $1, %z5 \n"
" .set mips3 \n"
- "2: sc $1, %1 \n"
+ "2: sc $1, %2 \n"
" beqzl $1, 1b \n"
__WEAK_LLSC_MB
"3: \n"
" .set pop \n"
" .section .fixup,\"ax\" \n"
- "4: li %0, %5 \n"
+ "4: li %0, %6 \n"
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
" "__UA_ADDR "\t1b, 4b \n"
" "__UA_ADDR "\t2b, 4b \n"
" .previous \n"
- : "=&r" (retval), "=R" (*uaddr)
+ : "+r" (ret), "=&r" (val), "=R" (*uaddr)
: "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT)
: "memory");
} else if (cpu_has_llsc) {
@@ -172,31 +174,32 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
" .set push \n"
" .set noat \n"
" .set mips3 \n"
- "1: ll %0, %2 \n"
- " bne %0, %z3, 3f \n"
+ "1: ll %1, %3 \n"
+ " bne %1, %z4, 3f \n"
" .set mips0 \n"
- " move $1, %z4 \n"
+ " move $1, %z5 \n"
" .set mips3 \n"
- "2: sc $1, %1 \n"
+ "2: sc $1, %2 \n"
" beqz $1, 1b \n"
__WEAK_LLSC_MB
"3: \n"
" .set pop \n"
" .section .fixup,\"ax\" \n"
- "4: li %0, %5 \n"
+ "4: li %0, %6 \n"
" j 3b \n"
" .previous \n"
" .section __ex_table,\"a\" \n"
" "__UA_ADDR "\t1b, 4b \n"
" "__UA_ADDR "\t2b, 4b \n"
" .previous \n"
- : "=&r" (retval), "=R" (*uaddr)
+ : "+r" (ret), "=&r" (val), "=R" (*uaddr)
: "R" (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT)
: "memory");
} else
return -ENOSYS;
- return retval;
+ *uval = val;
+ return ret;
}
#endif
diff --git a/arch/mips/include/asm/perf_event.h b/arch/mips/include/asm/perf_event.h
index e00007cf8162..d0c77496c728 100644
--- a/arch/mips/include/asm/perf_event.h
+++ b/arch/mips/include/asm/perf_event.h
@@ -11,15 +11,5 @@
#ifndef __MIPS_PERF_EVENT_H__
#define __MIPS_PERF_EVENT_H__
-
-/*
- * MIPS performance counters do not raise NMI upon overflow, a regular
- * interrupt will be signaled. Hence we can do the pending perf event
- * work at the tail of the irq handler.
- */
-static inline void
-set_perf_event_pending(void)
-{
-}
-
+/* Leave it empty here. The file is required by linux/perf_event.h */
#endif /* __MIPS_PERF_EVENT_H__ */
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 5a84a1f11231..94ca2b018af7 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -17,29 +17,13 @@
#include <asm/cacheflush.h>
#include <asm/uasm.h>
-/*
- * If the Instruction Pointer is in module space (0xc0000000), return true;
- * otherwise, it is in kernel space (0x80000000), return false.
- *
- * FIXME: This will not work when the kernel space and module space are the
- * same. If they are the same, we need to modify scripts/recordmcount.pl,
- * ftrace_make_nop/call() and the other related parts to ensure the
- * enabling/disabling of the calling site to _mcount is right for both kernel
- * and module.
- */
-
-static inline int in_module(unsigned long ip)
-{
- return ip & 0x40000000;
-}
+#include <asm-generic/sections.h>
#ifdef CONFIG_DYNAMIC_FTRACE
#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */
#define ADDR_MASK 0x03ffffff /* op_code|addr : 31...26|25 ....0 */
-#define INSN_B_1F_4 0x10000004 /* b 1f; offset = 4 */
-#define INSN_B_1F_5 0x10000005 /* b 1f; offset = 5 */
#define INSN_NOP 0x00000000 /* nop */
#define INSN_JAL(addr) \
((unsigned int)(JAL | (((addr) >> 2) & ADDR_MASK)))
@@ -69,6 +53,20 @@ static inline void ftrace_dyn_arch_init_insns(void)
#endif
}
+/*
+ * Check if the address is in kernel space
+ *
+ * Clone core_kernel_text() from kernel/extable.c, but doesn't call
+ * init_kernel_text() for Ftrace doesn't trace functions in init sections.
+ */
+static inline int in_kernel_space(unsigned long ip)
+{
+ if (ip >= (unsigned long)_stext &&
+ ip <= (unsigned long)_etext)
+ return 1;
+ return 0;
+}
+
static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
{
int faulted;
@@ -84,6 +82,42 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
return 0;
}
+/*
+ * The details about the calling site of mcount on MIPS
+ *
+ * 1. For kernel:
+ *
+ * move at, ra
+ * jal _mcount --> nop
+ *
+ * 2. For modules:
+ *
+ * 2.1 For KBUILD_MCOUNT_RA_ADDRESS and CONFIG_32BIT
+ *
+ * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005)
+ * addiu v1, v1, low_16bit_of_mcount
+ * move at, ra
+ * move $12, ra_address
+ * jalr v1
+ * sub sp, sp, 8
+ * 1: offset = 5 instructions
+ * 2.2 For the Other situations
+ *
+ * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004)
+ * addiu v1, v1, low_16bit_of_mcount
+ * move at, ra
+ * jalr v1
+ * nop | move $12, ra_address | sub sp, sp, 8
+ * 1: offset = 4 instructions
+ */
+
+#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
+#define MCOUNT_OFFSET_INSNS 5
+#else
+#define MCOUNT_OFFSET_INSNS 4
+#endif
+#define INSN_B_1F (0x10000000 | MCOUNT_OFFSET_INSNS)
+
int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
@@ -91,39 +125,11 @@ int ftrace_make_nop(struct module *mod,
unsigned long ip = rec->ip;
/*
- * We have compiled module with -mlong-calls, but compiled the kernel
- * without it, we need to cope with them respectively.
+ * If ip is in kernel space, no long call, otherwise, long call is
+ * needed.
*/
- if (in_module(ip)) {
-#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT)
- /*
- * lui v1, hi_16bit_of_mcount --> b 1f (0x10000005)
- * addiu v1, v1, low_16bit_of_mcount
- * move at, ra
- * move $12, ra_address
- * jalr v1
- * sub sp, sp, 8
- * 1: offset = 5 instructions
- */
- new = INSN_B_1F_5;
-#else
- /*
- * lui v1, hi_16bit_of_mcount --> b 1f (0x10000004)
- * addiu v1, v1, low_16bit_of_mcount
- * move at, ra
- * jalr v1
- * nop | move $12, ra_address | sub sp, sp, 8
- * 1: offset = 4 instructions
- */
- new = INSN_B_1F_4;
-#endif
- } else {
- /*
- * move at, ra
- * jal _mcount --> nop
- */
- new = INSN_NOP;
- }
+ new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F;
+
return ftrace_modify_code(ip, new);
}
@@ -132,8 +138,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
unsigned int new;
unsigned long ip = rec->ip;
- /* ip, module: 0xc0000000, kernel: 0x80000000 */
- new = in_module(ip) ? insn_lui_v1_hi16_mcount : insn_jal_ftrace_caller;
+ new = in_kernel_space(ip) ? insn_jal_ftrace_caller :
+ insn_lui_v1_hi16_mcount;
return ftrace_modify_code(ip, new);
}
@@ -190,29 +196,25 @@ int ftrace_disable_ftrace_graph_caller(void)
#define S_R_SP (0xafb0 << 16) /* s{d,w} R, offset(sp) */
#define OFFSET_MASK 0xffff /* stack offset range: 0 ~ PT_SIZE */
-unsigned long ftrace_get_parent_addr(unsigned long self_addr,
- unsigned long parent,
- unsigned long parent_addr,
- unsigned long fp)
+unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long
+ old_parent_ra, unsigned long parent_ra_addr, unsigned long fp)
{
- unsigned long sp, ip, ra;
+ unsigned long sp, ip, tmp;
unsigned int code;
int faulted;
/*
- * For module, move the ip from calling site of mcount to the
- * instruction "lui v1, hi_16bit_of_mcount"(offset is 20), but for
- * kernel, move to the instruction "move ra, at"(offset is 12)
+ * For module, move the ip from the return address after the
+ * instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for
+ * kernel, move after the instruction "move ra, at"(offset is 16)
*/
- ip = self_addr - (in_module(self_addr) ? 20 : 12);
+ ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24);
/*
* search the text until finding the non-store instruction or "s{d,w}
* ra, offset(sp)" instruction
*/
do {
- ip -= 4;
-
/* get the code at "ip": code = *(unsigned int *)ip; */
safe_load_code(code, ip, faulted);
@@ -224,18 +226,20 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
* store the ra on the stack
*/
if ((code & S_R_SP) != S_R_SP)
- return parent_addr;
+ return parent_ra_addr;
- } while (((code & S_RA_SP) != S_RA_SP));
+ /* Move to the next instruction */
+ ip -= 4;
+ } while ((code & S_RA_SP) != S_RA_SP);
sp = fp + (code & OFFSET_MASK);
- /* ra = *(unsigned long *)sp; */
- safe_load_stack(ra, sp, faulted);
+ /* tmp = *(unsigned long *)sp; */
+ safe_load_stack(tmp, sp, faulted);
if (unlikely(faulted))
return 0;
- if (ra == parent)
+ if (tmp == old_parent_ra)
return sp;
return 0;
}
@@ -246,21 +250,21 @@ unsigned long ftrace_get_parent_addr(unsigned long self_addr,
* Hook the return address and push it in the stack of return addrs
* in current thread info.
*/
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
unsigned long fp)
{
- unsigned long old;
+ unsigned long old_parent_ra;
struct ftrace_graph_ent trace;
unsigned long return_hooker = (unsigned long)
&return_to_handler;
- int faulted;
+ int faulted, insns;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
return;
/*
- * "parent" is the stack address saved the return address of the caller
- * of _mcount.
+ * "parent_ra_addr" is the stack address saved the return address of
+ * the caller of _mcount.
*
* if the gcc < 4.5, a leaf function does not save the return address
* in the stack address, so, we "emulate" one in _mcount's stack space,
@@ -275,37 +279,44 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
* do it in ftrace_graph_caller of mcount.S.
*/
- /* old = *parent; */
- safe_load_stack(old, parent, faulted);
+ /* old_parent_ra = *parent_ra_addr; */
+ safe_load_stack(old_parent_ra, parent_ra_addr, faulted);
if (unlikely(faulted))
goto out;
#ifndef KBUILD_MCOUNT_RA_ADDRESS
- parent = (unsigned long *)ftrace_get_parent_addr(self_addr, old,
- (unsigned long)parent, fp);
+ parent_ra_addr = (unsigned long *)ftrace_get_parent_ra_addr(self_ra,
+ old_parent_ra, (unsigned long)parent_ra_addr, fp);
/*
* If fails when getting the stack address of the non-leaf function's
* ra, stop function graph tracer and return
*/
- if (parent == 0)
+ if (parent_ra_addr == 0)
goto out;
#endif
- /* *parent = return_hooker; */
- safe_store_stack(return_hooker, parent, faulted);
+ /* *parent_ra_addr = return_hooker; */
+ safe_store_stack(return_hooker, parent_ra_addr, faulted);
if (unlikely(faulted))
goto out;
- if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) ==
- -EBUSY) {
- *parent = old;
+ if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp)
+ == -EBUSY) {
+ *parent_ra_addr = old_parent_ra;
return;
}
- trace.func = self_addr;
+ /*
+ * Get the recorded ip of the current mcount calling site in the
+ * __mcount_loc section, which will be used to filter the function
+ * entries configured through the tracing/set_graph_function interface.
+ */
+
+ insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
+ trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
current->curr_ret_stack--;
- *parent = old;
+ *parent_ra_addr = old_parent_ra;
}
return;
out:
diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index 2b7f3f703b83..a8244854d3dc 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -161,41 +161,6 @@ mipspmu_event_set_period(struct perf_event *event,
return ret;
}
-static int mipspmu_enable(struct perf_event *event)
-{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
- int idx;
- int err = 0;
-
- /* To look for a free counter for this event. */
- idx = mipspmu->alloc_counter(cpuc, hwc);
- if (idx < 0) {
- err = idx;
- goto out;
- }
-
- /*
- * If there is an event in the counter we are going to use then
- * make sure it is disabled.
- */
- event->hw.idx = idx;
- mipspmu->disable_event(idx);
- cpuc->events[idx] = event;
-
- /* Set the period for the event. */
- mipspmu_event_set_period(event, hwc, idx);
-
- /* Enable the event. */
- mipspmu->enable_event(hwc, idx);
-
- /* Propagate our changes to the userspace mapping. */
- perf_event_update_userpage(event);
-
-out:
- return err;
-}
-
static void mipspmu_event_update(struct perf_event *event,
struct hw_perf_event *hwc,
int idx)
@@ -204,7 +169,7 @@ static void mipspmu_event_update(struct perf_event *event,
unsigned long flags;
int shift = 64 - TOTAL_BITS;
s64 prev_raw_count, new_raw_count;
- s64 delta;
+ u64 delta;
again:
prev_raw_count = local64_read(&hwc->prev_count);
@@ -231,32 +196,90 @@ again:
return;
}
-static void mipspmu_disable(struct perf_event *event)
+static void mipspmu_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!mipspmu)
+ return;
+
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+
+ /* Set the period for the event. */
+ mipspmu_event_set_period(event, hwc, hwc->idx);
+
+ /* Enable the event. */
+ mipspmu->enable_event(hwc, hwc->idx);
+}
+
+static void mipspmu_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!mipspmu)
+ return;
+
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ /* We are working on a local event. */
+ mipspmu->disable_event(hwc->idx);
+ barrier();
+ mipspmu_event_update(event, hwc, hwc->idx);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ }
+}
+
+static int mipspmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- int idx = hwc->idx;
+ int idx;
+ int err = 0;
+ perf_pmu_disable(event->pmu);
- WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
+ /* To look for a free counter for this event. */
+ idx = mipspmu->alloc_counter(cpuc, hwc);
+ if (idx < 0) {
+ err = idx;
+ goto out;
+ }
- /* We are working on a local event. */
+ /*
+ * If there is an event in the counter we are going to use then
+ * make sure it is disabled.
+ */
+ event->hw.idx = idx;
mipspmu->disable_event(idx);
+ cpuc->events[idx] = event;
- barrier();
-
- mipspmu_event_update(event, hwc, idx);
- cpuc->events[idx] = NULL;
- clear_bit(idx, cpuc->used_mask);
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ if (flags & PERF_EF_START)
+ mipspmu_start(event, PERF_EF_RELOAD);
+ /* Propagate our changes to the userspace mapping. */
perf_event_update_userpage(event);
+
+out:
+ perf_pmu_enable(event->pmu);
+ return err;
}
-static void mipspmu_unthrottle(struct perf_event *event)
+static void mipspmu_del(struct perf_event *event, int flags)
{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
- mipspmu->enable_event(hwc, hwc->idx);
+ WARN_ON(idx < 0 || idx >= mipspmu->num_counters);
+
+ mipspmu_stop(event, PERF_EF_UPDATE);
+ cpuc->events[idx] = NULL;
+ clear_bit(idx, cpuc->used_mask);
+
+ perf_event_update_userpage(event);
}
static void mipspmu_read(struct perf_event *event)
@@ -270,12 +293,17 @@ static void mipspmu_read(struct perf_event *event)
mipspmu_event_update(event, hwc, hwc->idx);
}
-static struct pmu pmu = {
- .enable = mipspmu_enable,
- .disable = mipspmu_disable,
- .unthrottle = mipspmu_unthrottle,
- .read = mipspmu_read,
-};
+static void mipspmu_enable(struct pmu *pmu)
+{
+ if (mipspmu)
+ mipspmu->start();
+}
+
+static void mipspmu_disable(struct pmu *pmu)
+{
+ if (mipspmu)
+ mipspmu->stop();
+}
static atomic_t active_events = ATOMIC_INIT(0);
static DEFINE_MUTEX(pmu_reserve_mutex);
@@ -318,6 +346,82 @@ static void mipspmu_free_irq(void)
perf_irq = save_perf_irq;
}
+/*
+ * mipsxx/rm9000/loongson2 have different performance counters, they have
+ * specific low-level init routines.
+ */
+static void reset_counters(void *arg);
+static int __hw_perf_event_init(struct perf_event *event);
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+ if (atomic_dec_and_mutex_lock(&active_events,
+ &pmu_reserve_mutex)) {
+ /*
+ * We must not call the destroy function with interrupts
+ * disabled.
+ */
+ on_each_cpu(reset_counters,
+ (void *)(long)mipspmu->num_counters, 1);
+ mipspmu_free_irq();
+ mutex_unlock(&pmu_reserve_mutex);
+ }
+}
+
+static int mipspmu_event_init(struct perf_event *event)
+{
+ int err = 0;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ if (!mipspmu || event->cpu >= nr_cpumask_bits ||
+ (event->cpu >= 0 && !cpu_online(event->cpu)))
+ return -ENODEV;
+
+ if (!atomic_inc_not_zero(&active_events)) {
+ if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
+ atomic_dec(&active_events);
+ return -ENOSPC;
+ }
+
+ mutex_lock(&pmu_reserve_mutex);
+ if (atomic_read(&active_events) == 0)
+ err = mipspmu_get_irq();
+
+ if (!err)
+ atomic_inc(&active_events);
+ mutex_unlock(&pmu_reserve_mutex);
+ }
+
+ if (err)
+ return err;
+
+ err = __hw_perf_event_init(event);
+ if (err)
+ hw_perf_event_destroy(event);
+
+ return err;
+}
+
+static struct pmu pmu = {
+ .pmu_enable = mipspmu_enable,
+ .pmu_disable = mipspmu_disable,
+ .event_init = mipspmu_event_init,
+ .add = mipspmu_add,
+ .del = mipspmu_del,
+ .start = mipspmu_start,
+ .stop = mipspmu_stop,
+ .read = mipspmu_read,
+};
+
static inline unsigned int
mipspmu_perf_event_encode(const struct mips_perf_event *pev)
{
@@ -382,8 +486,9 @@ static int validate_event(struct cpu_hw_events *cpuc,
{
struct hw_perf_event fake_hwc = event->hw;
- if (event->pmu && event->pmu != &pmu)
- return 0;
+ /* Allow mixed event group. So return 1 to pass validation. */
+ if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF)
+ return 1;
return mipspmu->alloc_counter(cpuc, &fake_hwc) >= 0;
}
@@ -409,73 +514,6 @@ static int validate_group(struct perf_event *event)
return 0;
}
-/*
- * mipsxx/rm9000/loongson2 have different performance counters, they have
- * specific low-level init routines.
- */
-static void reset_counters(void *arg);
-static int __hw_perf_event_init(struct perf_event *event);
-
-static void hw_perf_event_destroy(struct perf_event *event)
-{
- if (atomic_dec_and_mutex_lock(&active_events,
- &pmu_reserve_mutex)) {
- /*
- * We must not call the destroy function with interrupts
- * disabled.
- */
- on_each_cpu(reset_counters,
- (void *)(long)mipspmu->num_counters, 1);
- mipspmu_free_irq();
- mutex_unlock(&pmu_reserve_mutex);
- }
-}
-
-const struct pmu *hw_perf_event_init(struct perf_event *event)
-{
- int err = 0;
-
- if (!mipspmu || event->cpu >= nr_cpumask_bits ||
- (event->cpu >= 0 && !cpu_online(event->cpu)))
- return ERR_PTR(-ENODEV);
-
- if (!atomic_inc_not_zero(&active_events)) {
- if (atomic_read(&active_events) > MIPS_MAX_HWEVENTS) {
- atomic_dec(&active_events);
- return ERR_PTR(-ENOSPC);
- }
-
- mutex_lock(&pmu_reserve_mutex);
- if (atomic_read(&active_events) == 0)
- err = mipspmu_get_irq();
-
- if (!err)
- atomic_inc(&active_events);
- mutex_unlock(&pmu_reserve_mutex);
- }
-
- if (err)
- return ERR_PTR(err);
-
- err = __hw_perf_event_init(event);
- if (err)
- hw_perf_event_destroy(event);
-
- return err ? ERR_PTR(err) : &pmu;
-}
-
-void hw_perf_enable(void)
-{
- if (mipspmu)
- mipspmu->start();
-}
-
-void hw_perf_disable(void)
-{
- if (mipspmu)
- mipspmu->stop();
-}
-
/* This is needed by specific irq handlers in perf_event_*.c */
static void
handle_associated_event(struct cpu_hw_events *cpuc,
@@ -496,21 +534,13 @@ handle_associated_event(struct cpu_hw_events *cpuc,
#include "perf_event_mipsxx.c"
/* Callchain handling code. */
-static inline void
-callchain_store(struct perf_callchain_entry *entry,
- u64 ip)
-{
- if (entry->nr < PERF_MAX_STACK_DEPTH)
- entry->ip[entry->nr++] = ip;
-}
/*
* Leave userspace callchain empty for now. When we find a way to trace
* the user stack callchains, we add here.
*/
-static void
-perf_callchain_user(struct pt_regs *regs,
- struct perf_callchain_entry *entry)
+void perf_callchain_user(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
{
}
@@ -523,23 +553,21 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
while (!kstack_end(sp)) {
addr = *sp++;
if (__kernel_text_address(addr)) {
- callchain_store(entry, addr);
+ perf_callchain_store(entry, addr);
if (entry->nr >= PERF_MAX_STACK_DEPTH)
break;
}
}
}
-static void
-perf_callchain_kernel(struct pt_regs *regs,
- struct perf_callchain_entry *entry)
+void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
{
unsigned long sp = regs->regs[29];
#ifdef CONFIG_KALLSYMS
unsigned long ra = regs->regs[31];
unsigned long pc = regs->cp0_epc;
- callchain_store(entry, PERF_CONTEXT_KERNEL);
if (raw_show_trace || !__kernel_text_address(pc)) {
unsigned long stack_page =
(unsigned long)task_stack_page(current);
@@ -549,53 +577,12 @@ perf_callchain_kernel(struct pt_regs *regs,
return;
}
do {
- callchain_store(entry, pc);
+ perf_callchain_store(entry, pc);
if (entry->nr >= PERF_MAX_STACK_DEPTH)
break;
pc = unwind_stack(current, &sp, pc, &ra);
} while (pc);
#else
- callchain_store(entry, PERF_CONTEXT_KERNEL);
save_raw_perf_callchain(entry, sp);
#endif
}
-
-static void
-perf_do_callchain(struct pt_regs *regs,
- struct perf_callchain_entry *entry)
-{
- int is_user;
-
- if (!regs)
- return;
-
- is_user = user_mode(regs);
-
- if (!current || !current->pid)
- return;
-
- if (is_user && current->state != TASK_RUNNING)
- return;
-
- if (!is_user) {
- perf_callchain_kernel(regs, entry);
- if (current->mm)
- regs = task_pt_regs(current);
- else
- regs = NULL;
- }
- if (regs)
- perf_callchain_user(regs, entry);
-}
-
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-
-struct perf_callchain_entry *
-perf_callchain(struct pt_regs *regs)
-{
- struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
-
- entry->nr = 0;
- perf_do_callchain(regs, entry);
- return entry;
-}
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 183e0d226669..d9a7db78ed62 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -696,7 +696,7 @@ static int mipsxx_pmu_handle_shared_irq(void)
* interrupt, not NMI.
*/
if (handled == IRQ_HANDLED)
- perf_event_do_pending();
+ irq_work_run();
#ifdef CONFIG_MIPS_MT_SMP
read_unlock(&pmuint_rwlock);
@@ -1045,6 +1045,8 @@ init_hw_perf_events(void)
"CPU, irq %d%s\n", mipspmu->name, counters, irq,
irq < 0 ? " (share with timer interrupt)" : "");
+ perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+
return 0;
}
early_initcall(init_hw_perf_events);
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 5922342bca39..dbbe0ce48d89 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -84,7 +84,7 @@ static int protected_save_fp_context(struct sigcontext __user *sc)
static int protected_restore_fp_context(struct sigcontext __user *sc)
{
- int err, tmp;
+ int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
own_fpu_inatomic(0);
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index a0ed0e052b2e..aae986613795 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -115,7 +115,7 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc)
static int protected_restore_fp_context32(struct sigcontext32 __user *sc)
{
- int err, tmp;
+ int err, tmp __maybe_unused;
while (1) {
lock_fpu_owner();
own_fpu_inatomic(0);
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 383aeb95cb49..32a256101082 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -193,6 +193,22 @@ void __devinit smp_prepare_boot_cpu(void)
*/
static struct task_struct *cpu_idle_thread[NR_CPUS];
+struct create_idle {
+ struct work_struct work;
+ struct task_struct *idle;
+ struct completion done;
+ int cpu;
+};
+
+static void __cpuinit do_fork_idle(struct work_struct *work)
+{
+ struct create_idle *c_idle =
+ container_of(work, struct create_idle, work);
+
+ c_idle->idle = fork_idle(c_idle->cpu);
+ complete(&c_idle->done);
+}
+
int __cpuinit __cpu_up(unsigned int cpu)
{
struct task_struct *idle;
@@ -203,8 +219,19 @@ int __cpuinit __cpu_up(unsigned int cpu)
* Linux can schedule processes on this slave.
*/
if (!cpu_idle_thread[cpu]) {
- idle = fork_idle(cpu);
- cpu_idle_thread[cpu] = idle;
+ /*
+ * Schedule work item to avoid forking user task
+ * Ported from arch/x86/kernel/smpboot.c
+ */
+ struct create_idle c_idle = {
+ .cpu = cpu,
+ .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
+ };
+
+ INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
+ schedule_work(&c_idle.work);
+ wait_for_completion(&c_idle.done);
+ idle = cpu_idle_thread[cpu] = c_idle.idle;
if (IS_ERR(idle))
panic(KERN_ERR "Fork failed for CPU %d", cpu);
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 1dc6edff45e0..58beabf50b3c 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -383,12 +383,11 @@ save_static_function(sys_sysmips);
static int __used noinline
_sys_sysmips(nabi_no_regargs struct pt_regs regs)
{
- long cmd, arg1, arg2, arg3;
+ long cmd, arg1, arg2;
cmd = regs.regs[4];
arg1 = regs.regs[5];
arg2 = regs.regs[6];
- arg3 = regs.regs[7];
switch (cmd) {
case MIPS_ATOMIC_SET:
@@ -405,7 +404,7 @@ _sys_sysmips(nabi_no_regargs struct pt_regs regs)
if (arg1 & 2)
set_thread_flag(TIF_LOGADE);
else
- clear_thread_flag(TIF_FIXADE);
+ clear_thread_flag(TIF_LOGADE);
return 0;
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 570607b376b5..832afbb87588 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -115,7 +115,7 @@ SECTIONS
EXIT_DATA
}
- PERCPU(PAGE_SIZE)
+ PERCPU(1 << CONFIG_MIPS_L1_CACHE_SHIFT, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
__init_end = .;
/* freed after init ends here */
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 6a1fdfef8fde..ab52b7cf3b6b 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -148,9 +148,9 @@ struct {
spinlock_t tc_list_lock;
struct list_head tc_list; /* Thread contexts */
} vpecontrol = {
- .vpe_list_lock = SPIN_LOCK_UNLOCKED,
+ .vpe_list_lock = __SPIN_LOCK_UNLOCKED(vpe_list_lock),
.vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list),
- .tc_list_lock = SPIN_LOCK_UNLOCKED,
+ .tc_list_lock = __SPIN_LOCK_UNLOCKED(tc_list_lock),
.tc_list = LIST_HEAD_INIT(vpecontrol.tc_list)
};
diff --git a/arch/mips/loongson/Kconfig b/arch/mips/loongson/Kconfig
index 6e1b77fec7ea..aca93eed8779 100644
--- a/arch/mips/loongson/Kconfig
+++ b/arch/mips/loongson/Kconfig
@@ -1,6 +1,7 @@
+if MACH_LOONGSON
+
choice
prompt "Machine Type"
- depends on MACH_LOONGSON
config LEMOTE_FULOONG2E
bool "Lemote Fuloong(2e) mini-PC"
@@ -87,3 +88,5 @@ config LOONGSON_UART_BASE
config LOONGSON_MC146818
bool
default n
+
+endif # MACH_LOONGSON
diff --git a/arch/mips/loongson/common/cmdline.c b/arch/mips/loongson/common/cmdline.c
index 1a06defc4f7f..353e1d2e41a5 100644
--- a/arch/mips/loongson/common/cmdline.c
+++ b/arch/mips/loongson/common/cmdline.c
@@ -44,10 +44,5 @@ void __init prom_init_cmdline(void)
strcat(arcs_cmdline, " ");
}
- if ((strstr(arcs_cmdline, "console=")) == NULL)
- strcat(arcs_cmdline, " console=ttyS0,115200");
- if ((strstr(arcs_cmdline, "root=")) == NULL)
- strcat(arcs_cmdline, " root=/dev/hda1");
-
prom_init_machtype();
}
diff --git a/arch/mips/loongson/common/machtype.c b/arch/mips/loongson/common/machtype.c
index 81fbe6b73f91..2efd5d9dee27 100644
--- a/arch/mips/loongson/common/machtype.c
+++ b/arch/mips/loongson/common/machtype.c
@@ -41,7 +41,7 @@ void __weak __init mach_prom_init_machtype(void)
void __init prom_init_machtype(void)
{
- char *p, str[MACHTYPE_LEN];
+ char *p, str[MACHTYPE_LEN + 1];
int machtype = MACH_LEMOTE_FL2E;
mips_machtype = LOONGSON_MACHTYPE;
@@ -53,6 +53,7 @@ void __init prom_init_machtype(void)
}
p += strlen("machtype=");
strncpy(str, p, MACHTYPE_LEN);
+ str[MACHTYPE_LEN] = '\0';
p = strstr(str, " ");
if (p)
*p = '\0';
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 2701d9500959..2a7d43f4f161 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -70,7 +70,7 @@
#define COMPXSP \
- unsigned xm; int xe; int xs; int xc
+ unsigned xm; int xe; int xs __maybe_unused; int xc
#define COMPYSP \
unsigned ym; int ye; int ys; int yc
@@ -104,7 +104,7 @@
#define COMPXDP \
-u64 xm; int xe; int xs; int xc
+u64 xm; int xe; int xs __maybe_unused; int xc
#define COMPYDP \
u64 ym; int ye; int ys; int yc
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 2efcbd24c82f..279599e9a779 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -324,7 +324,7 @@ int page_is_ram(unsigned long pagenr)
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
- unsigned long lastpfn;
+ unsigned long lastpfn __maybe_unused;
pagetable_init();
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 083d3412d0bc..04f9e17db9d0 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -109,6 +109,8 @@ static bool scratchpad_available(void)
static int scratchpad_offset(int i)
{
BUG();
+ /* Really unreachable, but evidently some GCC want this. */
+ return 0;
}
#endif
/*
diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c
index b7c03d80c88c..68798f869c0f 100644
--- a/arch/mips/pci/ops-pmcmsp.c
+++ b/arch/mips/pci/ops-pmcmsp.c
@@ -308,7 +308,7 @@ static struct resource pci_mem_resource = {
* RETURNS: PCIBIOS_SUCCESSFUL - success
*
****************************************************************************/
-static int bpci_interrupt(int irq, void *dev_id)
+static irqreturn_t bpci_interrupt(int irq, void *dev_id)
{
struct msp_pci_regs *preg = (void *)PCI_BASE_REG;
unsigned int stat = preg->if_status;
@@ -326,7 +326,7 @@ static int bpci_interrupt(int irq, void *dev_id)
/* write to clear all asserted interrupts */
preg->if_status = stat;
- return PCIBIOS_SUCCESSFUL;
+ return IRQ_HANDLED;
}
/*****************************************************************************
diff --git a/arch/mips/pmc-sierra/Kconfig b/arch/mips/pmc-sierra/Kconfig
index c139988bb85d..8d798497c614 100644
--- a/arch/mips/pmc-sierra/Kconfig
+++ b/arch/mips/pmc-sierra/Kconfig
@@ -4,15 +4,11 @@ choice
config PMC_MSP4200_EVAL
bool "PMC-Sierra MSP4200 Eval Board"
- select CEVT_R4K
- select CSRC_R4K
select IRQ_MSP_SLP
select HW_HAS_PCI
config PMC_MSP4200_GW
bool "PMC-Sierra MSP4200 VoIP Gateway"
- select CEVT_R4K
- select CSRC_R4K
select IRQ_MSP_SLP
select HW_HAS_PCI
diff --git a/arch/mips/pmc-sierra/msp71xx/msp_time.c b/arch/mips/pmc-sierra/msp71xx/msp_time.c
index cca64e15f57f..01df84ce31e2 100644
--- a/arch/mips/pmc-sierra/msp71xx/msp_time.c
+++ b/arch/mips/pmc-sierra/msp71xx/msp_time.c
@@ -81,7 +81,7 @@ void __init plat_time_init(void)
mips_hpt_frequency = cpu_rate/2;
}
-unsigned int __init get_c0_compare_int(void)
+unsigned int __cpuinit get_c0_compare_int(void)
{
return MSP_INT_VPE0_TIMER;
}
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 92d2f9298e38..9d773a639513 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -139,7 +139,7 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
* Atomically reads the value of @v. Note that the guaranteed
* useful range of an atomic_t is only 24 bits.
*/
-#define atomic_read(v) ((v)->counter)
+#define atomic_read(v) (ACCESS_ONCE((v)->counter))
/**
* atomic_set - set atomic variable
diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h
index 679dee0bbd08..3d6e60dad9d9 100644
--- a/arch/mn10300/include/asm/uaccess.h
+++ b/arch/mn10300/include/asm/uaccess.h
@@ -160,9 +160,10 @@ struct __large_struct { unsigned long buf[100]; };
#define __get_user_check(x, ptr, size) \
({ \
+ const __typeof__(ptr) __guc_ptr = (ptr); \
int _e; \
- if (likely(__access_ok((unsigned long) (ptr), (size)))) \
- _e = __get_user_nocheck((x), (ptr), (size)); \
+ if (likely(__access_ok((unsigned long) __guc_ptr, (size)))) \
+ _e = __get_user_nocheck((x), __guc_ptr, (size)); \
else { \
_e = -EFAULT; \
(x) = (__typeof__(x))0; \
diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
index 75da468090b9..5b955000626d 100644
--- a/arch/mn10300/kernel/time.c
+++ b/arch/mn10300/kernel/time.c
@@ -104,8 +104,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
unsigned tsc, elapse;
irqreturn_t ret;
- write_seqlock(&xtime_lock);
-
while (tsc = get_cycles(),
elapse = tsc - mn10300_last_tsc, /* time elapsed since last
* tick */
@@ -114,11 +112,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
mn10300_last_tsc += MN10300_TSC_PER_HZ;
/* advance the kernel's time tracking system */
- do_timer(1);
+ xtime_update(1);
}
- write_sequnlock(&xtime_lock);
-
ret = local_timer_interrupt();
#ifdef CONFIG_SMP
send_IPI_allbutself(LOCAL_TIMER_IPI);
diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S
index febbeee7f2f5..968bcd2cb022 100644
--- a/arch/mn10300/kernel/vmlinux.lds.S
+++ b/arch/mn10300/kernel/vmlinux.lds.S
@@ -70,7 +70,7 @@ SECTIONS
.exit.text : { EXIT_TEXT; }
.exit.data : { EXIT_DATA; }
- PERCPU(PAGE_SIZE)
+ PERCPU(32, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
__init_end = .;
/* freed after init ends here */
diff --git a/arch/mn10300/mm/cache-inv-icache.c b/arch/mn10300/mm/cache-inv-icache.c
index a8933a60b2d4..a6b63dde603d 100644
--- a/arch/mn10300/mm/cache-inv-icache.c
+++ b/arch/mn10300/mm/cache-inv-icache.c
@@ -69,7 +69,7 @@ static void flush_icache_page_range(unsigned long start, unsigned long end)
/* invalidate the icache coverage on that region */
mn10300_local_icache_inv_range2(addr + off, size);
- smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end);
+ smp_cache_call(SMP_ICACHE_INV_RANGE, start, end);
}
/**
@@ -101,7 +101,7 @@ void flush_icache_range(unsigned long start, unsigned long end)
* directly */
start_page = (start >= 0x80000000UL) ? start : 0x80000000UL;
mn10300_icache_inv_range(start_page, end);
- smp_cache_call(SMP_ICACHE_INV_FLUSH_RANGE, start, end);
+ smp_cache_call(SMP_ICACHE_INV_RANGE, start, end);
if (start_page == start)
goto done;
end = start_page;
diff --git a/arch/parisc/hpux/sys_hpux.c b/arch/parisc/hpux/sys_hpux.c
index 30394081d9b6..6ab9580b0b00 100644
--- a/arch/parisc/hpux/sys_hpux.c
+++ b/arch/parisc/hpux/sys_hpux.c
@@ -185,26 +185,21 @@ struct hpux_statfs {
int16_t f_pad;
};
-static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf)
+static int do_statfs_hpux(struct kstatfs *st, struct hpux_statfs __user *p)
{
- struct kstatfs st;
- int retval;
-
- retval = vfs_statfs(path, &st);
- if (retval)
- return retval;
-
- memset(buf, 0, sizeof(*buf));
- buf->f_type = st.f_type;
- buf->f_bsize = st.f_bsize;
- buf->f_blocks = st.f_blocks;
- buf->f_bfree = st.f_bfree;
- buf->f_bavail = st.f_bavail;
- buf->f_files = st.f_files;
- buf->f_ffree = st.f_ffree;
- buf->f_fsid[0] = st.f_fsid.val[0];
- buf->f_fsid[1] = st.f_fsid.val[1];
-
+ struct hpux_statfs buf;
+ memset(&buf, 0, sizeof(buf));
+ buf.f_type = st->f_type;
+ buf.f_bsize = st->f_bsize;
+ buf.f_blocks = st->f_blocks;
+ buf.f_bfree = st->f_bfree;
+ buf.f_bavail = st->f_bavail;
+ buf.f_files = st->f_files;
+ buf.f_ffree = st->f_ffree;
+ buf.f_fsid[0] = st->f_fsid.val[0];
+ buf.f_fsid[1] = st->f_fsid.val[1];
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
return 0;
}
@@ -212,35 +207,19 @@ static int do_statfs_hpux(struct path *path, struct hpux_statfs *buf)
asmlinkage long hpux_statfs(const char __user *pathname,
struct hpux_statfs __user *buf)
{
- struct path path;
- int error;
-
- error = user_path(pathname, &path);
- if (!error) {
- struct hpux_statfs tmp;
- error = do_statfs_hpux(&path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_put(&path);
- }
+ struct kstatfs st;
+ int error = user_statfs(pathname, &st);
+ if (!error)
+ error = do_statfs_hpux(&st, buf);
return error;
}
asmlinkage long hpux_fstatfs(unsigned int fd, struct hpux_statfs __user * buf)
{
- struct file *file;
- struct hpux_statfs tmp;
- int error;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = do_statfs_hpux(&file->f_path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
- out:
+ struct kstatfs st;
+ int error = fd_statfs(fd, &st);
+ if (!error)
+ error = do_statfs_hpux(&st, buf);
return error;
}
diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h
index f357fc693c89..0304b92ccfea 100644
--- a/arch/parisc/include/asm/fcntl.h
+++ b/arch/parisc/include/asm/fcntl.h
@@ -19,6 +19,8 @@
#define O_NOFOLLOW 000000200 /* don't follow links */
#define O_INVISIBLE 004000000 /* invisible I/O, for DMAPI/XDSM */
+#define O_PATH 020000000
+
#define F_GETLK64 8
#define F_SETLK64 9
#define F_SETLKW64 10
diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h
index 0c705c3a55ef..67a33cc27ef2 100644
--- a/arch/parisc/include/asm/futex.h
+++ b/arch/parisc/include/asm/futex.h
@@ -8,7 +8,7 @@
#include <asm/errno.h>
static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -18,7 +18,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -51,10 +51,10 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
/* Non-atomic version */
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- int err = 0;
- int uval;
+ u32 val;
/* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
* our gateway page, and causes no end of trouble...
@@ -62,15 +62,15 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
if (segment_eq(KERNEL_DS, get_fs()) && !uaddr)
return -EFAULT;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- err = get_user(uval, uaddr);
- if (err) return -EFAULT;
- if (uval == oldval)
- err = put_user(newval, uaddr);
- if (err) return -EFAULT;
- return uval;
+ if (get_user(val, uaddr))
+ return -EFAULT;
+ if (val == oldval && put_user(newval, uaddr))
+ return -EFAULT;
+ *uval = val;
+ return 0;
}
#endif /*__KERNEL__*/
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 05511ccb61d2..45b7389d77aa 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -162,11 +162,8 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
update_process_times(user_mode(get_irq_regs()));
}
- if (cpu == 0) {
- write_seqlock(&xtime_lock);
- do_timer(ticks_elapsed);
- write_sequnlock(&xtime_lock);
- }
+ if (cpu == 0)
+ xtime_update(ticks_elapsed);
return IRQ_HANDLED;
}
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index d64a6bbec2aa..8f1e4efd143e 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -145,7 +145,7 @@ SECTIONS
EXIT_DATA
}
- PERCPU(PAGE_SIZE)
+ PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
__init_end = .;
/* freed after init ends here */
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 7c589ef81fb0..c94e4a3fe2ef 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -30,7 +30,7 @@
: "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
: "cr0", "memory")
-static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -40,7 +40,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -82,35 +82,38 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- int prev;
+ int ret = 0;
+ u32 prev;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
__asm__ __volatile__ (
PPC_RELEASE_BARRIER
-"1: lwarx %0,0,%2 # futex_atomic_cmpxchg_inatomic\n\
- cmpw 0,%0,%3\n\
+"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
+ cmpw 0,%1,%4\n\
bne- 3f\n"
- PPC405_ERR77(0,%2)
-"2: stwcx. %4,0,%2\n\
+ PPC405_ERR77(0,%3)
+"2: stwcx. %5,0,%3\n\
bne- 1b\n"
PPC_ACQUIRE_BARRIER
"3: .section .fixup,\"ax\"\n\
-4: li %0,%5\n\
+4: li %0,%6\n\
b 3b\n\
.previous\n\
.section __ex_table,\"a\"\n\
.align 3\n\
" PPC_LONG "1b,4b,2b,4b\n\
.previous" \
- : "=&r" (prev), "+m" (*uaddr)
+ : "+r" (ret), "=&r" (prev), "+m" (*uaddr)
: "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)
: "cc", "memory");
- return prev;
+ *uval = prev;
+ return ret;
}
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 380d48bacd16..26b8c807f8f1 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -33,9 +33,25 @@
//
//----------------------------------------------------------------------------
#include <linux/cache.h>
+#include <linux/threads.h>
#include <asm/types.h>
#include <asm/mmu.h>
+/*
+ * We only have to have statically allocated lppaca structs on
+ * legacy iSeries, which supports at most 64 cpus.
+ */
+#ifdef CONFIG_PPC_ISERIES
+#if NR_CPUS < 64
+#define NR_LPPACAS NR_CPUS
+#else
+#define NR_LPPACAS 64
+#endif
+#else /* not iSeries */
+#define NR_LPPACAS 1
+#endif
+
+
/* The Hypervisor barfs if the lppaca crosses a page boundary. A 1k
* alignment is sufficient to prevent this */
struct lppaca {
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 991d5998d6be..fe56a23e1ff0 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -240,6 +240,12 @@ struct machdep_calls {
* claims to support kexec.
*/
int (*machine_kexec_prepare)(struct kimage *image);
+
+ /* Called to perform the _real_ kexec.
+ * Do NOT allocate memory or fail here. We are past the point of
+ * no return.
+ */
+ void (*machine_kexec)(struct kimage *image);
#endif /* CONFIG_KEXEC */
#ifdef CONFIG_SUSPEND
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 51e9e6f90d12..edeb80fdd2c3 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -171,6 +171,16 @@ static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus)
return bus->sysdata;
}
+static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
+{
+ struct pci_controller *host;
+
+ if (bus->self)
+ return pci_device_to_OF_node(bus->self);
+ host = pci_bus_to_host(bus);
+ return host ? host->dn : NULL;
+}
+
static inline int isa_vaddr_is_ioport(void __iomem *address)
{
/* No specific ISA handling on ppc32 at this stage, it
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index d72757585595..c189aa5fe1f4 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -70,21 +70,6 @@ static inline int of_node_to_nid(struct device_node *device) { return 0; }
#endif
#define of_node_to_nid of_node_to_nid
-/**
- * of_irq_map_pci - Resolve the interrupt for a PCI device
- * @pdev: the device whose interrupt is to be resolved
- * @out_irq: structure of_irq filled by this function
- *
- * This function resolves the PCI interrupt for a given PCI device. If a
- * device-node exists for a given pci_dev, it will use normal OF tree
- * walking. If not, it will implement standard swizzling and walk up the
- * PCI tree until an device-node is found, at which point it will finish
- * resolving using the OF tree walking.
- */
-struct pci_dev;
-struct of_irq;
-extern int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
-
extern void of_instantiate_rtc(void);
/* These includes are put at the bottom because they may contain things
diff --git a/arch/powerpc/include/asm/rwsem.h b/arch/powerpc/include/asm/rwsem.h
index 8447d89fbe72..bb1e2cdeb9bf 100644
--- a/arch/powerpc/include/asm/rwsem.h
+++ b/arch/powerpc/include/asm/rwsem.h
@@ -13,11 +13,6 @@
* by Paul Mackerras <paulus@samba.org>.
*/
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-
/*
* the semaphore definition
*/
@@ -33,47 +28,6 @@
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-struct rw_semaphore {
- long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ \
- RWSEM_UNLOCKED_VALUE, \
- __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) \
-}
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
- do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
- } while (0)
-
/*
* lock for reading
*/
@@ -174,10 +128,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return sem->count != 0;
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_RWSEM_H */
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 49a170af8145..a5f8672eeff3 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -87,7 +87,10 @@ void machine_kexec(struct kimage *image)
save_ftrace_enabled = __ftrace_enabled_save();
- default_machine_kexec(image);
+ if (ppc_md.machine_kexec)
+ ppc_md.machine_kexec(image);
+ else
+ default_machine_kexec(image);
__ftrace_enabled_restore(save_ftrace_enabled);
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index ebf9846f3c3b..f4adf89d7614 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -27,20 +27,6 @@ extern unsigned long __toc_start;
#ifdef CONFIG_PPC_BOOK3S
/*
- * We only have to have statically allocated lppaca structs on
- * legacy iSeries, which supports at most 64 cpus.
- */
-#ifdef CONFIG_PPC_ISERIES
-#if NR_CPUS < 64
-#define NR_LPPACAS NR_CPUS
-#else
-#define NR_LPPACAS 64
-#endif
-#else /* not iSeries */
-#define NR_LPPACAS 1
-#endif
-
-/*
* The structure which the hypervisor knows about - this structure
* should not cross a page boundary. The vpa_init/register_vpa call
* is now known to fail if the lppaca structure crosses a page
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 10a44e68ef11..eb341be9a4d9 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/of_address.h>
+#include <linux/of_pci.h>
#include <linux/mm.h>
#include <linux/list.h>
#include <linux/syscalls.h>
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7a1d5cb76932..8303a6c65ef7 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -353,6 +353,7 @@ static void switch_booke_debug_regs(struct thread_struct *new_thread)
prime_debug_regs(new_thread);
}
#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
static void set_debug_reg_defaults(struct thread_struct *thread)
{
if (thread->dabr) {
@@ -360,6 +361,7 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
set_dabr(0);
}
}
+#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
int set_dabr(unsigned long dabr)
@@ -670,11 +672,11 @@ void flush_thread(void)
{
discard_lazy_cpu_state();
-#ifdef CONFIG_HAVE_HW_BREAKPOINTS
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
flush_ptrace_hw_breakpoint(current);
-#else /* CONFIG_HAVE_HW_BREAKPOINTS */
+#else /* CONFIG_HAVE_HW_BREAKPOINT */
set_debug_reg_defaults(&current->thread);
-#endif /* CONFIG_HAVE_HW_BREAKPOINTS */
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
}
void
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index c2b7a07cc3d3..47187cc2cf00 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -2,95 +2,11 @@
#include <linux/kernel.h>
#include <linux/string.h>
-#include <linux/pci_regs.h>
#include <linux/module.h>
#include <linux/ioport.h>
#include <linux/etherdevice.h>
#include <linux/of_address.h>
#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-
-#ifdef CONFIG_PCI
-int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
-{
- struct device_node *dn, *ppnode;
- struct pci_dev *ppdev;
- u32 lspec;
- u32 laddr[3];
- u8 pin;
- int rc;
-
- /* Check if we have a device node, if yes, fallback to standard OF
- * parsing
- */
- dn = pci_device_to_OF_node(pdev);
- if (dn) {
- rc = of_irq_map_one(dn, 0, out_irq);
- if (!rc)
- return rc;
- }
-
- /* Ok, we don't, time to have fun. Let's start by building up an
- * interrupt spec. we assume #interrupt-cells is 1, which is standard
- * for PCI. If you do different, then don't use that routine.
- */
- rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
- if (rc != 0)
- return rc;
- /* No pin, exit */
- if (pin == 0)
- return -ENODEV;
-
- /* Now we walk up the PCI tree */
- lspec = pin;
- for (;;) {
- /* Get the pci_dev of our parent */
- ppdev = pdev->bus->self;
-
- /* Ouch, it's a host bridge... */
- if (ppdev == NULL) {
-#ifdef CONFIG_PPC64
- ppnode = pci_bus_to_OF_node(pdev->bus);
-#else
- struct pci_controller *host;
- host = pci_bus_to_host(pdev->bus);
- ppnode = host ? host->dn : NULL;
-#endif
- /* No node for host bridge ? give up */
- if (ppnode == NULL)
- return -EINVAL;
- } else
- /* We found a P2P bridge, check if it has a node */
- ppnode = pci_device_to_OF_node(ppdev);
-
- /* Ok, we have found a parent with a device-node, hand over to
- * the OF parsing code.
- * We build a unit address from the linux device to be used for
- * resolution. Note that we use the linux bus number which may
- * not match your firmware bus numbering.
- * Fortunately, in most cases, interrupt-map-mask doesn't include
- * the bus number as part of the matching.
- * You should still be careful about that though if you intend
- * to rely on this function (you ship a firmware that doesn't
- * create device nodes for all PCI devices).
- */
- if (ppnode)
- break;
-
- /* We can only get here if we hit a P2P bridge with no node,
- * let's do standard swizzling and try again
- */
- lspec = pci_swizzle_interrupt_pin(pdev, lspec);
- pdev = ppdev;
- }
-
- laddr[0] = (pdev->bus->number << 16)
- | (pdev->devfn << 8);
- laddr[1] = laddr[2] = 0;
- return of_irq_map_raw(ppnode, &lspec, 1, laddr, out_irq);
-}
-EXPORT_SYMBOL_GPL(of_irq_map_pci);
-#endif /* CONFIG_PCI */
void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
unsigned long *busno, unsigned long *phys, unsigned long *size)
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 8a0deefac08d..b9150f07d266 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -160,7 +160,7 @@ SECTIONS
INIT_RAM_FS
}
- PERCPU(PAGE_SIZE)
+ PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
. = ALIGN(8);
.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index fd4812329570..0dc95c0aa3be 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1516,7 +1516,8 @@ int start_topology_update(void)
{
int rc = 0;
- if (firmware_has_feature(FW_FEATURE_VPHN) &&
+ /* Disabled until races with load balancing are fixed */
+ if (0 && firmware_has_feature(FW_FEATURE_VPHN) &&
get_lppaca()->shared_proc) {
vphn_enabled = 1;
setup_cpu_associativity_change_counters();
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 1ec06576f619..c14d09f614f3 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -38,13 +38,11 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
* neesd to be flushed. This function will either perform the flush
* immediately or will batch it up if the current CPU has an active
* batch on it.
- *
- * Must be called from within some kind of spinlock/non-preempt region...
*/
void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long pte, int huge)
{
- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
+ struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
unsigned long vsid, vaddr;
unsigned int psize;
int ssize;
@@ -99,6 +97,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
*/
if (!batch->active) {
flush_hash_page(vaddr, rpte, psize, ssize, 0);
+ put_cpu_var(ppc64_tlb_batch);
return;
}
@@ -127,6 +126,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
batch->index = ++i;
if (i >= PPC64_TLB_BATCH_NR)
__flush_tlb_pending(batch);
+ put_cpu_var(ppc64_tlb_batch);
}
/*
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index 187a7d32f86a..a3d2ce54ea2e 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -70,7 +70,7 @@ static long do_spu_create(const char __user *pathname, unsigned int flags,
if (!IS_ERR(tmp)) {
struct nameidata nd;
- ret = path_lookup(tmp, LOOKUP_PARENT, &nd);
+ ret = kern_path_parent(tmp, &nd);
if (!ret) {
nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE;
ret = spufs_create(&nd, flags, mode, neighbor);
diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c
index fdb7384c0c4f..f0491cc28900 100644
--- a/arch/powerpc/platforms/iseries/dt.c
+++ b/arch/powerpc/platforms/iseries/dt.c
@@ -242,8 +242,8 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
pft_size[0] = 0; /* NUMA CEC cookie, 0 for non NUMA */
pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE);
- for (i = 0; i < NR_CPUS; i++) {
- if (lppaca_of(i).dyn_proc_status >= 2)
+ for (i = 0; i < NR_LPPACAS; i++) {
+ if (lppaca[i].dyn_proc_status >= 2)
continue;
snprintf(p, 32 - (p - buf), "@%d", i);
@@ -251,7 +251,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
dt_prop_str(dt, "device_type", device_type_cpu);
- index = lppaca_of(i).dyn_hv_phys_proc_index;
+ index = lppaca[i].dyn_hv_phys_proc_index;
d = &xIoHriProcessorVpd[index];
dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024);
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index b0863410517f..2946ae10fbfd 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -680,6 +680,7 @@ void * __init iSeries_early_setup(void)
* on but calling this function multiple times is fine.
*/
identify_cpu(0, mfspr(SPRN_PVR));
+ initialise_paca(&boot_paca, 0);
powerpc_firmware_features |= FW_FEATURE_ISERIES;
powerpc_firmware_features |= FW_FEATURE_LPAR;
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index 0851eb1e919e..2751b3a8a66f 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -133,11 +133,12 @@ unsigned long decompress_kernel(void)
unsigned long output_addr;
unsigned char *output;
- check_ipl_parmblock((void *) 0, (unsigned long) output + SZ__bss_start);
+ output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL;
+ check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start);
memset(&_bss, 0, &_ebss - &_bss);
free_mem_ptr = (unsigned long)&_end;
free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
- output = (unsigned char *) ((free_mem_end_ptr + 4095UL) & -4096UL);
+ output = (unsigned char *) output_addr;
#ifdef CONFIG_BLK_DEV_INITRD
/*
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index f42dbabc0d30..48884f89ab92 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -38,6 +38,7 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
BUG_ON(ret != bsize);
data += bsize - index;
len -= bsize - index;
+ index = 0;
}
/* process as many blocks as possible */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 76daea117181..5c5ba10384c2 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -36,14 +36,19 @@
static inline int atomic_read(const atomic_t *v)
{
- barrier();
- return v->counter;
+ int c;
+
+ asm volatile(
+ " l %0,%1\n"
+ : "=d" (c) : "Q" (v->counter));
+ return c;
}
static inline void atomic_set(atomic_t *v, int i)
{
- v->counter = i;
- barrier();
+ asm volatile(
+ " st %1,%0\n"
+ : "=Q" (v->counter) : "d" (i));
}
static inline int atomic_add_return(int i, atomic_t *v)
@@ -128,14 +133,19 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
static inline long long atomic64_read(const atomic64_t *v)
{
- barrier();
- return v->counter;
+ long long c;
+
+ asm volatile(
+ " lg %0,%1\n"
+ : "=d" (c) : "Q" (v->counter));
+ return c;
}
static inline void atomic64_set(atomic64_t *v, long long i)
{
- v->counter = i;
- barrier();
+ asm volatile(
+ " stg %1,%0\n"
+ : "=Q" (v->counter) : "d" (i));
}
static inline long long atomic64_add_return(long long i, atomic64_t *v)
diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
index 24aafa68b643..2a30d5ac0667 100644
--- a/arch/s390/include/asm/cache.h
+++ b/arch/s390/include/asm/cache.h
@@ -13,6 +13,7 @@
#define L1_CACHE_BYTES 256
#define L1_CACHE_SHIFT 8
+#define NET_SKB_PAD 32
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 5c5d02de49e9..81cf36b691f1 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -7,7 +7,7 @@
#include <linux/uaccess.h>
#include <asm/errno.h>
-static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -18,7 +18,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -39,13 +39,13 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
return ret;
}
-static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr,
- int oldval, int newval)
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- return uaccess.futex_atomic_cmpxchg(uaddr, oldval, newval);
+ return uaccess.futex_atomic_cmpxchg(uval, uaddr, oldval, newval);
}
#endif /* __KERNEL__ */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 423fdda2322d..d0eb4653cebd 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -43,29 +43,6 @@
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
#ifndef __s390x__
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
@@ -81,41 +58,6 @@ struct rw_semaphore {
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
/*
- * initialisation
- */
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
-
-/*
* lock for reading
*/
static inline void __down_read(struct rw_semaphore *sem)
@@ -377,10 +319,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return new;
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _S390_RWSEM_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index d6b1ed0ec52b..2d9ea11f919a 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -83,8 +83,8 @@ struct uaccess_ops {
size_t (*clear_user)(size_t, void __user *);
size_t (*strnlen_user)(size_t, const char __user *);
size_t (*strncpy_from_user)(size_t, const char __user *, char *);
- int (*futex_atomic_op)(int op, int __user *, int oparg, int *old);
- int (*futex_atomic_cmpxchg)(int __user *, int old, int new);
+ int (*futex_atomic_op)(int op, u32 __user *, int oparg, int *old);
+ int (*futex_atomic_cmpxchg)(u32 *, u32 __user *, u32 old, u32 new);
};
extern struct uaccess_ops uaccess;
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index a68ac10213b2..1bc18cdb525b 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -77,7 +77,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
- PERCPU(PAGE_SIZE)
+ PERCPU(0x100, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
__init_end = .; /* freed after init ends here */
diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h
index 126011df14f1..1d2536cb630b 100644
--- a/arch/s390/lib/uaccess.h
+++ b/arch/s390/lib/uaccess.h
@@ -12,12 +12,12 @@ extern size_t copy_from_user_std(size_t, const void __user *, void *);
extern size_t copy_to_user_std(size_t, void __user *, const void *);
extern size_t strnlen_user_std(size_t, const char __user *);
extern size_t strncpy_from_user_std(size_t, const char __user *, char *);
-extern int futex_atomic_cmpxchg_std(int __user *, int, int);
-extern int futex_atomic_op_std(int, int __user *, int, int *);
+extern int futex_atomic_cmpxchg_std(u32 *, u32 __user *, u32, u32);
+extern int futex_atomic_op_std(int, u32 __user *, int, int *);
extern size_t copy_from_user_pt(size_t, const void __user *, void *);
extern size_t copy_to_user_pt(size_t, void __user *, const void *);
-extern int futex_atomic_op_pt(int, int __user *, int, int *);
-extern int futex_atomic_cmpxchg_pt(int __user *, int, int);
+extern int futex_atomic_op_pt(int, u32 __user *, int, int *);
+extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32);
#endif /* __ARCH_S390_LIB_UACCESS_H */
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 404f2de296dc..74833831417f 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -302,7 +302,7 @@ fault:
: "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
"m" (*uaddr) : "cc" );
-static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
+static int __futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
{
int oldval = 0, newval, ret;
@@ -335,7 +335,7 @@ static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
return ret;
}
-int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
+int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old)
{
int ret;
@@ -354,26 +354,29 @@ int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
return ret;
}
-static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+static int __futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
int ret;
asm volatile("0: cs %1,%4,0(%5)\n"
- "1: lr %0,%1\n"
+ "1: la %0,0\n"
"2:\n"
EX_TABLE(0b,2b) EX_TABLE(1b,2b)
: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
: "cc", "memory" );
+ *uval = oldval;
return ret;
}
-int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
int ret;
if (segment_eq(get_fs(), KERNEL_DS))
- return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
+ return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
spin_lock(&current->mm->page_table_lock);
uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
if (!uaddr) {
@@ -382,7 +385,7 @@ int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
}
get_page(virt_to_page(uaddr));
spin_unlock(&current->mm->page_table_lock);
- ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
+ ret = __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval);
put_page(virt_to_page(uaddr));
return ret;
}
diff --git a/arch/s390/lib/uaccess_std.c b/arch/s390/lib/uaccess_std.c
index a6c4f7ed24a4..bb1a7eed42ce 100644
--- a/arch/s390/lib/uaccess_std.c
+++ b/arch/s390/lib/uaccess_std.c
@@ -255,7 +255,7 @@ size_t strncpy_from_user_std(size_t size, const char __user *src, char *dst)
: "0" (-EFAULT), "d" (oparg), "a" (uaddr), \
"m" (*uaddr) : "cc");
-int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old)
+int futex_atomic_op_std(int op, u32 __user *uaddr, int oparg, int *old)
{
int oldval = 0, newval, ret;
@@ -287,19 +287,21 @@ int futex_atomic_op_std(int op, int __user *uaddr, int oparg, int *old)
return ret;
}
-int futex_atomic_cmpxchg_std(int __user *uaddr, int oldval, int newval)
+int futex_atomic_cmpxchg_std(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
int ret;
asm volatile(
" sacf 256\n"
"0: cs %1,%4,0(%5)\n"
- "1: lr %0,%1\n"
+ "1: la %0,0\n"
"2: sacf 0\n"
EX_TABLE(0b,2b) EX_TABLE(1b,2b)
: "=d" (ret), "+d" (oldval), "=m" (*uaddr)
: "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
: "cc", "memory" );
+ *uval = oldval;
return ret;
}
diff --git a/arch/sh/include/asm/futex-irq.h b/arch/sh/include/asm/futex-irq.h
index a9f16a7f9aea..6cb9f193a95e 100644
--- a/arch/sh/include/asm/futex-irq.h
+++ b/arch/sh/include/asm/futex-irq.h
@@ -3,7 +3,7 @@
#include <asm/system.h>
-static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr,
+static inline int atomic_futex_op_xchg_set(int oparg, u32 __user *uaddr,
int *oldval)
{
unsigned long flags;
@@ -20,7 +20,7 @@ static inline int atomic_futex_op_xchg_set(int oparg, int __user *uaddr,
return ret;
}
-static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr,
+static inline int atomic_futex_op_xchg_add(int oparg, u32 __user *uaddr,
int *oldval)
{
unsigned long flags;
@@ -37,7 +37,7 @@ static inline int atomic_futex_op_xchg_add(int oparg, int __user *uaddr,
return ret;
}
-static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr,
+static inline int atomic_futex_op_xchg_or(int oparg, u32 __user *uaddr,
int *oldval)
{
unsigned long flags;
@@ -54,7 +54,7 @@ static inline int atomic_futex_op_xchg_or(int oparg, int __user *uaddr,
return ret;
}
-static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr,
+static inline int atomic_futex_op_xchg_and(int oparg, u32 __user *uaddr,
int *oldval)
{
unsigned long flags;
@@ -71,7 +71,7 @@ static inline int atomic_futex_op_xchg_and(int oparg, int __user *uaddr,
return ret;
}
-static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr,
+static inline int atomic_futex_op_xchg_xor(int oparg, u32 __user *uaddr,
int *oldval)
{
unsigned long flags;
@@ -88,11 +88,13 @@ static inline int atomic_futex_op_xchg_xor(int oparg, int __user *uaddr,
return ret;
}
-static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr,
- int oldval, int newval)
+static inline int atomic_futex_op_cmpxchg_inatomic(u32 *uval,
+ u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
unsigned long flags;
- int ret, prev = 0;
+ int ret;
+ u32 prev = 0;
local_irq_save(flags);
@@ -102,10 +104,8 @@ static inline int atomic_futex_op_cmpxchg_inatomic(int __user *uaddr,
local_irq_restore(flags);
- if (ret)
- return ret;
-
- return prev;
+ *uval = prev;
+ return ret;
}
#endif /* __ASM_SH_FUTEX_IRQ_H */
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index 68256ec5fa35..7be39a646fbd 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -10,7 +10,7 @@
/* XXX: UP variants, fix for SH-4A and SMP.. */
#include <asm/futex-irq.h>
-static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -21,7 +21,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -65,12 +65,13 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- return atomic_futex_op_cmpxchg_inatomic(uaddr, oldval, newval);
+ return atomic_futex_op_cmpxchg_inatomic(uval, uaddr, oldval, newval);
}
#endif /* __KERNEL__ */
diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h
index 06e2251a5e48..edab57265293 100644
--- a/arch/sh/include/asm/rwsem.h
+++ b/arch/sh/include/asm/rwsem.h
@@ -11,64 +11,13 @@
#endif
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
#define RWSEM_ACTIVE_MASK 0x0000ffff
#define RWSEM_WAITING_BIAS (-0x00010000)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
/*
* lock for reading
@@ -179,10 +128,5 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_SH_RWSEM_H */
diff --git a/arch/sh/include/asm/sections.h b/arch/sh/include/asm/sections.h
index a78701da775b..4a5350037c8f 100644
--- a/arch/sh/include/asm/sections.h
+++ b/arch/sh/include/asm/sections.h
@@ -3,7 +3,7 @@
#include <asm-generic/sections.h>
-extern void __nosave_begin, __nosave_end;
+extern long __nosave_begin, __nosave_end;
extern long __machvec_start, __machvec_end;
extern char __uncached_start, __uncached_end;
extern char _ebss[];
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
index 672944f5b19c..e53b4b38bd11 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
@@ -14,7 +14,7 @@
#include <linux/io.h>
#include <linux/sh_timer.h>
#include <linux/serial_sci.h>
-#include <asm/machtypes.h>
+#include <generated/machtypes.h>
static struct resource rtc_resources[] = {
[0] = {
@@ -255,12 +255,17 @@ static struct platform_device *sh7750_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
+ struct platform_device *dev[1];
+
if (mach_is_rts7751r2d()) {
scif_platform_data.scscr |= SCSCR_CKE1;
- early_platform_add_devices(&scif_device, 1);
+ dev[0] = &scif_device;
+ early_platform_add_devices(dev, 1);
} else {
- early_platform_add_devices(&sci_device, 1);
- early_platform_add_devices(&scif_device, 1);
+ dev[0] = &sci_device;
+ early_platform_add_devices(dev, 1);
+ dev[0] = &scif_device;
+ early_platform_add_devices(dev, 1);
}
early_platform_add_devices(sh7750_early_devices,
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 7f8a709c3ada..af4d46187a79 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -66,7 +66,7 @@ SECTIONS
__machvec_end = .;
}
- PERCPU(PAGE_SIZE)
+ PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
/*
* .exit.text is discarded at runtime, not link time, to deal with
diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c
index faa8f86c0db4..0901b2f14e15 100644
--- a/arch/sh/lib/delay.c
+++ b/arch/sh/lib/delay.c
@@ -10,6 +10,16 @@
void __delay(unsigned long loops)
{
__asm__ __volatile__(
+ /*
+ * ST40-300 appears to have an issue with this code,
+ * normally taking two cycles each loop, as with all
+ * other SH variants. If however the branch and the
+ * delay slot straddle an 8 byte boundary, this increases
+ * to 3 cycles.
+ * This align directive ensures this doesn't occur.
+ */
+ ".balign 8\n\t"
+
"tst %0, %0\n\t"
"1:\t"
"bf/s 1b\n\t"
diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
index 88d3dc3d30d5..5a580ea04429 100644
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@@ -108,7 +108,8 @@ void copy_user_highpage(struct page *to, struct page *from,
kunmap_atomic(vfrom, KM_USER0);
}
- if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
+ if (pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK) ||
+ (vma->vm_flags & VM_EXEC))
__flush_purge_region(vto, PAGE_SIZE);
kunmap_atomic(vto, KM_USER1);
diff --git a/arch/sparc/include/asm/fcntl.h b/arch/sparc/include/asm/fcntl.h
index 38f37b333cc7..d0b83f66f356 100644
--- a/arch/sparc/include/asm/fcntl.h
+++ b/arch/sparc/include/asm/fcntl.h
@@ -34,6 +34,8 @@
#define __O_SYNC 0x800000
#define O_SYNC (__O_SYNC|O_DSYNC)
+#define O_PATH 0x1000000
+
#define F_GETOWN 5 /* for sockets. */
#define F_SETOWN 6 /* for sockets. */
#define F_GETLK 7
diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h
index 47f95839dc69..444e7bea23bc 100644
--- a/arch/sparc/include/asm/futex_64.h
+++ b/arch/sparc/include/asm/futex_64.h
@@ -30,7 +30,7 @@
: "r" (uaddr), "r" (oparg), "i" (-EFAULT) \
: "memory")
-static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -38,7 +38,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret, tem;
- if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(int))))
+ if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
return -EFAULT;
if (unlikely((((unsigned long) uaddr) & 0x3UL)))
return -EINVAL;
@@ -85,26 +85,30 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
+ int ret = 0;
+
__asm__ __volatile__(
- "\n1: casa [%3] %%asi, %2, %0\n"
+ "\n1: casa [%4] %%asi, %3, %1\n"
"2:\n"
" .section .fixup,#alloc,#execinstr\n"
" .align 4\n"
"3: sethi %%hi(2b), %0\n"
" jmpl %0 + %%lo(2b), %%g0\n"
- " mov %4, %0\n"
+ " mov %5, %0\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
" .align 4\n"
" .word 1b, 3b\n"
" .previous\n"
- : "=r" (newval)
- : "0" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT)
+ : "+r" (ret), "=r" (newval)
+ : "1" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT)
: "memory");
- return newval;
+ *uval = newval;
+ return ret;
}
#endif /* !(_SPARC64_FUTEX_H) */
diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h
index a2f5c61f924e..843e4faf6a50 100644
--- a/arch/sparc/include/asm/pcr.h
+++ b/arch/sparc/include/asm/pcr.h
@@ -43,4 +43,6 @@ static inline u64 picl_value(unsigned int nmi_hz)
extern u64 pcr_enable;
+extern int pcr_arch_init(void);
+
#endif /* __PCR_H */
diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h
index a2b4302869bc..069bf4d663a1 100644
--- a/arch/sparc/include/asm/rwsem.h
+++ b/arch/sparc/include/asm/rwsem.h
@@ -13,53 +13,12 @@
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-struct rw_semaphore {
- signed long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000L
#define RWSEM_ACTIVE_BIAS 0x00000001L
#define RWSEM_ACTIVE_MASK 0xffffffffL
#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
/*
* lock for reading
@@ -160,11 +119,6 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return atomic64_add_return(delta, (atomic64_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _SPARC64_RWSEM_H */
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 47977a77f6c6..72509d0e34be 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -255,10 +255,9 @@ static inline iopte_t *alloc_npages(struct device *dev, struct iommu *iommu,
static int iommu_alloc_ctx(struct iommu *iommu)
{
int lowest = iommu->ctx_lowest_free;
- int sz = IOMMU_NUM_CTXS - lowest;
- int n = find_next_zero_bit(iommu->ctx_bitmap, sz, lowest);
+ int n = find_next_zero_bit(iommu->ctx_bitmap, IOMMU_NUM_CTXS, lowest);
- if (unlikely(n == sz)) {
+ if (unlikely(n == IOMMU_NUM_CTXS)) {
n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1);
if (unlikely(n == lowest)) {
printk(KERN_WARNING "IOMMU: Ran out of contexts.\n");
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index aeaa09a3c655..2cdc131b50ac 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -700,10 +700,8 @@ static void pcic_clear_clock_irq(void)
static irqreturn_t pcic_timer_handler (int irq, void *h)
{
- write_seqlock(&xtime_lock); /* Dummy, to show that we remember */
pcic_clear_clock_irq();
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
#endif
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index ae96cf52a955..7c2ced612b8f 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -167,5 +167,3 @@ out_unregister:
unregister_perf_hsvc();
return err;
}
-
-early_initcall(pcr_arch_init);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b6a2b8f47040..555a76d1f4a1 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -49,6 +49,7 @@
#include <asm/mdesc.h>
#include <asm/ldc.h>
#include <asm/hypervisor.h>
+#include <asm/pcr.h>
#include "cpumap.h"
@@ -1358,6 +1359,7 @@ void __cpu_die(unsigned int cpu)
void __init smp_cpus_done(unsigned int max_cpus)
{
+ pcr_arch_init();
}
void smp_send_reschedule(int cpu)
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 9c743b1886ff..4211bfc9bcad 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -85,7 +85,7 @@ int update_persistent_clock(struct timespec now)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
#define TICK_SIZE (tick_nsec / 1000)
@@ -96,14 +96,9 @@ static irqreturn_t timer_interrupt(int dummy, void *dev_id)
profile_tick(CPU_PROFILING);
#endif
- /* Protect counter clear so that do_gettimeoffset works */
- write_seqlock(&xtime_lock);
-
clear_clock_irq();
- do_timer(1);
-
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/sparc/kernel/una_asm_32.S b/arch/sparc/kernel/una_asm_32.S
index 8cc03458eb7e..8f096e84a937 100644
--- a/arch/sparc/kernel/una_asm_32.S
+++ b/arch/sparc/kernel/una_asm_32.S
@@ -24,9 +24,9 @@ retl_efault:
.globl __do_int_store
__do_int_store:
ld [%o2], %g1
- cmp %1, 2
+ cmp %o1, 2
be 2f
- cmp %1, 4
+ cmp %o1, 4
be 1f
srl %g1, 24, %g2
srl %g1, 16, %g7
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 0c1e6783657f..92b557afe535 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -108,7 +108,7 @@ SECTIONS
__sun4v_2insn_patch_end = .;
}
- PERCPU(PAGE_SIZE)
+ PERCPU(SMP_CACHE_BYTES, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
__init_end = .;
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index cbddeb38ffda..d3c7a12ad879 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -16,7 +16,7 @@
#define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)])
spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = {
- [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
+ [0 ... (ATOMIC_HASH_SIZE-1)] = __SPIN_LOCK_UNLOCKED(__atomic_hash)
};
#else /* SMP */
diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c
index 764b3eb7b604..48d00e72ce15 100644
--- a/arch/sparc/lib/bitext.c
+++ b/arch/sparc/lib/bitext.c
@@ -10,7 +10,7 @@
*/
#include <linux/string.h>
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
#include <asm/bitext.h>
@@ -80,8 +80,7 @@ int bit_map_string_get(struct bit_map *t, int len, int align)
while (test_bit(offset + i, t->map) == 0) {
i++;
if (i == len) {
- for (i = 0; i < len; i++)
- __set_bit(offset + i, t->map);
+ bitmap_set(t->map, offset, len);
if (offset == t->first_free)
t->first_free = find_next_zero_bit
(t->map, t->size,
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index fe0d10dcae57..d03ec124a598 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -29,16 +29,16 @@
#include <linux/uaccess.h>
#include <linux/errno.h>
-extern struct __get_user futex_set(int __user *v, int i);
-extern struct __get_user futex_add(int __user *v, int n);
-extern struct __get_user futex_or(int __user *v, int n);
-extern struct __get_user futex_andn(int __user *v, int n);
-extern struct __get_user futex_cmpxchg(int __user *v, int o, int n);
+extern struct __get_user futex_set(u32 __user *v, int i);
+extern struct __get_user futex_add(u32 __user *v, int n);
+extern struct __get_user futex_or(u32 __user *v, int n);
+extern struct __get_user futex_andn(u32 __user *v, int n);
+extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n);
#ifndef __tilegx__
-extern struct __get_user futex_xor(int __user *v, int n);
+extern struct __get_user futex_xor(u32 __user *v, int n);
#else
-static inline struct __get_user futex_xor(int __user *uaddr, int n)
+static inline struct __get_user futex_xor(u32 __user *uaddr, int n)
{
struct __get_user asm_ret = __get_user_4(uaddr);
if (!asm_ret.err) {
@@ -53,7 +53,7 @@ static inline struct __get_user futex_xor(int __user *uaddr, int n)
}
#endif
-static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -65,7 +65,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -119,16 +119,17 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
return ret;
}
-static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
- int newval)
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
struct __get_user asm_ret;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
asm_ret = futex_cmpxchg(uaddr, oldval, newval);
- return asm_ret.err ? asm_ret.err : asm_ret.val;
+ *uval = asm_ret.val;
+ return asm_ret.err;
}
#ifndef __tilegx__
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
index 25fdc0c1839a..c6ce378e0678 100644
--- a/arch/tile/kernel/vmlinux.lds.S
+++ b/arch/tile/kernel/vmlinux.lds.S
@@ -63,7 +63,7 @@ SECTIONS
*(.init.page)
} :data =0
INIT_DATA_SECTION(16)
- PERCPU(PAGE_SIZE)
+ PERCPU(L2_CACHE_BYTES, PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
VMLINUX_SYMBOL(_einitdata) = .;
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index e351e14b4339..1e78940218c0 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -7,6 +7,7 @@ config UML
bool
default y
select HAVE_GENERIC_HARDIRQS
+ select GENERIC_HARDIRQS_NO_DEPRECATED
config MMU
bool
diff --git a/arch/um/Kconfig.x86 b/arch/um/Kconfig.x86
index 5ee328099c63..02fb017fed47 100644
--- a/arch/um/Kconfig.x86
+++ b/arch/um/Kconfig.x86
@@ -10,6 +10,8 @@ endmenu
config UML_X86
def_bool y
+ select GENERIC_FIND_FIRST_BIT
+ select GENERIC_FIND_NEXT_BIT
config 64BIT
bool
@@ -19,6 +21,9 @@ config X86_32
def_bool !64BIT
select HAVE_AOUT
+config X86_64
+ def_bool 64BIT
+
config RWSEM_XCHGADD_ALGORITHM
def_bool X86_XADD
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 975613b23dcf..c70e047eed72 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -124,35 +124,18 @@ void mconsole_log(struct mc_request *req)
#if 0
void mconsole_proc(struct mc_request *req)
{
- struct nameidata nd;
struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt;
struct file *file;
- int n, err;
+ int n;
char *ptr = req->request.data, *buf;
mm_segment_t old_fs = get_fs();
ptr += strlen("proc");
ptr = skip_spaces(ptr);
- err = vfs_path_lookup(mnt->mnt_root, mnt, ptr, LOOKUP_FOLLOW, &nd);
- if (err) {
- mconsole_reply(req, "Failed to look up file", 1, 0);
- goto out;
- }
-
- err = may_open(&nd.path, MAY_READ, O_RDONLY);
- if (result) {
- mconsole_reply(req, "Failed to open file", 1, 0);
- path_put(&nd.path);
- goto out;
- }
-
- file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
- current_cred());
- err = PTR_ERR(file);
+ file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY);
if (IS_ERR(file)) {
mconsole_reply(req, "Failed to open file", 1, 0);
- path_put(&nd.path);
goto out;
}
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index ba4a98ba39c0..620f5b70957d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -185,7 +185,7 @@ struct ubd {
.no_cow = 0, \
.shared = 0, \
.cow = DEFAULT_COW, \
- .lock = SPIN_LOCK_UNLOCKED, \
+ .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
.request = NULL, \
.start_sg = 0, \
.end_sg = 0, \
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index ac55b9efa1ce..34bede8aad4a 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -42,7 +42,7 @@
INIT_SETUP(0)
}
- PERCPU(32)
+ PERCPU(32, 32)
.initcall.init : {
INIT_CALLS
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 3f0ac9e0c966..64cfea80cfe2 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -35,8 +35,10 @@ int show_interrupts(struct seq_file *p, void *v)
}
if (i < NR_IRQS) {
- raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
+ struct irq_desc *desc = irq_to_desc(i);
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ action = desc->action;
if (!action)
goto skip;
seq_printf(p, "%3d: ",i);
@@ -46,7 +48,7 @@ int show_interrupts(struct seq_file *p, void *v)
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#endif
- seq_printf(p, " %14s", irq_desc[i].chip->name);
+ seq_printf(p, " %14s", get_irq_desc_chip(desc)->name);
seq_printf(p, " %s", action->name);
for (action=action->next; action; action = action->next)
@@ -54,7 +56,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
skip:
- raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
} else if (i == NR_IRQS)
seq_putc(p, '\n');
@@ -360,10 +362,10 @@ EXPORT_SYMBOL(um_request_irq);
EXPORT_SYMBOL(reactivate_fd);
/*
- * irq_chip must define (startup || enable) &&
- * (shutdown || disable) && end
+ * irq_chip must define at least enable/disable and ack when
+ * the edge handler is used.
*/
-static void dummy(unsigned int irq)
+static void dummy(struct irq_data *d)
{
}
@@ -371,20 +373,17 @@ static void dummy(unsigned int irq)
static struct irq_chip normal_irq_type = {
.name = "SIGIO",
.release = free_irq_by_irq_and_dev,
- .disable = dummy,
- .enable = dummy,
- .ack = dummy,
- .end = dummy
+ .irq_disable = dummy,
+ .irq_enable = dummy,
+ .irq_ack = dummy,
};
static struct irq_chip SIGVTALRM_irq_type = {
.name = "SIGVTALRM",
.release = free_irq_by_irq_and_dev,
- .shutdown = dummy, /* never called */
- .disable = dummy,
- .enable = dummy,
- .ack = dummy,
- .end = dummy
+ .irq_disable = dummy,
+ .irq_enable = dummy,
+ .irq_ack = dummy,
};
void __init init_IRQ(void)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1359bc9f4fd3..e1f65c46bc93 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -64,8 +64,12 @@ config X86
select HAVE_TEXT_POKE_SMP
select HAVE_GENERIC_HARDIRQS
select HAVE_SPARSE_IRQ
+ select GENERIC_FIND_FIRST_BIT
+ select GENERIC_FIND_NEXT_BIT
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
+ select GENERIC_IRQ_SHOW
+ select IRQ_FORCED_THREADING
select USE_GENERIC_SMP_HELPERS if SMP
config INSTRUCTION_DECODER
@@ -378,6 +382,8 @@ config X86_INTEL_CE
depends on X86_32
depends on X86_EXTENDED_PLATFORM
select X86_REBOOTFIXUPS
+ select OF
+ select OF_EARLY_FLATTREE
---help---
Select for the Intel CE media processor (CE4100) SOC.
This option compiles in support for the CE4100 SOC for settop
@@ -807,7 +813,7 @@ config X86_LOCAL_APIC
config X86_IO_APIC
def_bool y
- depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
+ depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
config X86_VISWS_APIC
def_bool y
@@ -1701,7 +1707,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
depends on NUMA
config USE_PERCPU_NUMA_NODE_ID
- def_bool X86_64
+ def_bool y
depends on NUMA
menu "Power management and ACPI options"
@@ -2062,9 +2068,10 @@ config SCx200HR_TIMER
config OLPC
bool "One Laptop Per Child support"
+ depends on !X86_PAE
select GPIOLIB
- select OLPC_OPENFIRMWARE
- depends on !X86_64 && !X86_PAE
+ select OF
+ select OF_PROMTREE if PROC_DEVICETREE
---help---
Add support for detecting the unique features of the OLPC
XO hardware.
@@ -2075,21 +2082,6 @@ config OLPC_XO1
---help---
Add support for non-essential features of the OLPC XO-1 laptop.
-config OLPC_OPENFIRMWARE
- bool "Support for OLPC's Open Firmware"
- depends on !X86_64 && !X86_PAE
- default n
- select OF
- help
- This option adds support for the implementation of Open Firmware
- that is used on the OLPC XO-1 Children's Machine.
- If unsure, say N here.
-
-config OLPC_OPENFIRMWARE_DT
- bool
- default y if OLPC_OPENFIRMWARE && PROC_DEVICETREE
- select OF_PROMTREE
-
endif # X86_32
config AMD_NB
@@ -2134,6 +2126,11 @@ config SYSVIPC_COMPAT
def_bool y
depends on COMPAT && SYSVIPC
+config KEYS_COMPAT
+ bool
+ depends on COMPAT && KEYS
+ default y
+
endmenu
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 283c5a6a03a6..ed47e6e1747f 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -294,11 +294,6 @@ config X86_GENERIC
endif
-config X86_CPU
- def_bool y
- select GENERIC_FIND_FIRST_BIT
- select GENERIC_FIND_NEXT_BIT
-
#
# Define implied options from the CPU selection here
config X86_INTERNODE_CACHE_SHIFT
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 646aa78ba5fd..46a823882437 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -62,7 +62,12 @@ int main(int argc, char *argv[])
if (fseek(f, -4L, SEEK_END)) {
perror(argv[1]);
}
- fread(&olen, sizeof olen, 1, f);
+
+ if (fread(&olen, sizeof(olen), 1, f) != 1) {
+ perror(argv[1]);
+ return 1;
+ }
+
ilen = ftell(f);
olen = getle32(&olen);
fclose(f);
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index e1e60c7d5813..e0e6340c8dad 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -873,22 +873,18 @@ rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
crypto_ablkcipher_clear_flags(ctr_tfm, ~0);
ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len);
- if (ret) {
- crypto_free_ablkcipher(ctr_tfm);
- return ret;
- }
+ if (ret)
+ goto out_free_ablkcipher;
+ ret = -ENOMEM;
req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL);
- if (!req) {
- crypto_free_ablkcipher(ctr_tfm);
- return -EINVAL;
- }
+ if (!req)
+ goto out_free_ablkcipher;
req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
- if (!req_data) {
- crypto_free_ablkcipher(ctr_tfm);
- return -ENOMEM;
- }
+ if (!req_data)
+ goto out_free_request;
+
memset(req_data->iv, 0, sizeof(req_data->iv));
/* Clear the data in the hash sub key container to zero.*/
@@ -913,8 +909,10 @@ rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
if (!ret)
ret = req_data->result.err;
}
- ablkcipher_request_free(req);
kfree(req_data);
+out_free_request:
+ ablkcipher_request_free(req);
+out_free_ablkcipher:
crypto_free_ablkcipher(ctr_tfm);
return ret;
}
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 518bb99c3394..430312ba6e3f 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -25,6 +25,8 @@
#define sysretl_audit ia32_ret_from_sys_call
#endif
+ .section .entry.text, "ax"
+
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
.macro IA32_ARG_FIXUP noebp=0
@@ -126,26 +128,20 @@ ENTRY(ia32_sysenter_target)
*/
ENABLE_INTERRUPTS(CLBR_NONE)
movl %ebp,%ebp /* zero extension */
- pushq $__USER32_DS
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $__USER32_DS
/*CFI_REL_OFFSET ss,0*/
- pushq %rbp
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rbp
CFI_REL_OFFSET rsp,0
- pushfq
- CFI_ADJUST_CFA_OFFSET 8
+ pushfq_cfi
/*CFI_REL_OFFSET rflags,0*/
movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
CFI_REGISTER rip,r10
- pushq $__USER32_CS
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $__USER32_CS
/*CFI_REL_OFFSET cs,0*/
movl %eax, %eax
- pushq %r10
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %r10
CFI_REL_OFFSET rip,0
- pushq %rax
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rax
cld
SAVE_ARGS 0,0,1
/* no need to do an access_ok check here because rbp has been
@@ -182,11 +178,9 @@ sysexit_from_sys_call:
xorq %r9,%r9
xorq %r10,%r10
xorq %r11,%r11
- popfq
- CFI_ADJUST_CFA_OFFSET -8
+ popfq_cfi
/*CFI_RESTORE rflags*/
- popq %rcx /* User %esp */
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rcx /* User %esp */
CFI_REGISTER rsp,rcx
TRACE_IRQS_ON
ENABLE_INTERRUPTS_SYSEXIT32
@@ -421,8 +415,7 @@ ENTRY(ia32_syscall)
*/
ENABLE_INTERRUPTS(CLBR_NONE)
movl %eax,%eax
- pushq %rax
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rax
cld
/* note the registers are not zero extended to the sf.
this could be a problem. */
@@ -851,4 +844,7 @@ ia32_sys_call_table:
.quad sys_fanotify_init
.quad sys32_fanotify_mark
.quad sys_prlimit64 /* 340 */
+ .quad sys_name_to_handle_at
+ .quad compat_sys_open_by_handle_at
+ .quad compat_sys_clock_adjtime
ia32_syscall_end:
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 4784df504d28..448d73a371ba 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -89,6 +89,7 @@ extern int acpi_disabled;
extern int acpi_pci_disabled;
extern int acpi_skip_timer_override;
extern int acpi_use_timer_override;
+extern int acpi_fix_pin2_polarity;
extern u8 acpi_sci_flags;
extern int acpi_sci_override_gsi;
@@ -187,15 +188,7 @@ struct bootnode;
#ifdef CONFIG_ACPI_NUMA
extern int acpi_numa;
-extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
- unsigned long end);
-extern int acpi_scan_nodes(unsigned long start, unsigned long end);
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-
-#ifdef CONFIG_NUMA_EMU
-extern void acpi_fake_nodes(const struct bootnode *fake_nodes,
- int num_nodes);
-#endif
+extern int x86_acpi_numa_init(void);
#endif /* CONFIG_ACPI_NUMA */
#define acpi_unlazy_tlb(x) leave_mm(x)
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 64dc82ee19f0..e264ae5a1443 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -9,23 +9,20 @@ struct amd_nb_bus_dev_range {
u8 dev_limit;
};
-extern struct pci_device_id amd_nb_misc_ids[];
+extern const struct pci_device_id amd_nb_misc_ids[];
extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
struct bootnode;
extern int early_is_amd_nb(u32 value);
extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
-extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
-extern int amd_scan_nodes(void);
-
-#ifdef CONFIG_NUMA_EMU
-extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
-extern void amd_get_nodes(struct bootnode *nodes);
-#endif
+extern int amd_numa_init(void);
+extern int amd_get_subcaches(int);
+extern int amd_set_subcaches(int, int);
struct amd_northbridge {
struct pci_dev *misc;
+ struct pci_dev *link;
};
struct amd_northbridge_info {
@@ -37,6 +34,7 @@ extern struct amd_northbridge_info amd_northbridges;
#define AMD_NB_GART 0x1
#define AMD_NB_L3_INDEX_DISABLE 0x2
+#define AMD_NB_L3_PARTITIONING 0x4
#ifdef CONFIG_AMD_NB
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3c896946f4cc..a279d98ea95e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -220,7 +220,6 @@ extern void enable_IR_x2apic(void);
extern int get_physical_broadcast(void);
-extern void apic_disable(void);
extern int lapic_get_maxlvt(void);
extern void clear_local_APIC(void);
extern void connect_bsp_APIC(void);
@@ -228,7 +227,6 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void);
extern void lapic_shutdown(void);
extern int verify_local_APIC(void);
-extern void cache_APIC_registers(void);
extern void sync_Arb_IDs(void);
extern void init_bsp_APIC(void);
extern void setup_local_APIC(void);
@@ -239,8 +237,7 @@ void register_lapic_address(unsigned long address);
extern void setup_boot_APIC_clock(void);
extern void setup_secondary_APIC_clock(void);
extern int APIC_init_uniprocessor(void);
-extern void enable_NMI_through_LVT0(void);
-extern int apic_force_enable(void);
+extern int apic_force_enable(unsigned long addr);
/*
* On 32bit this is mach-xxx local
@@ -261,7 +258,6 @@ static inline void lapic_shutdown(void) { }
#define local_apic_timer_c2_ok 1
static inline void init_apic_mappings(void) { }
static inline void disable_local_APIC(void) { }
-static inline void apic_disable(void) { }
# define setup_boot_APIC_clock x86_init_noop
# define setup_secondary_APIC_clock x86_init_noop
#endif /* !CONFIG_X86_LOCAL_APIC */
@@ -307,8 +303,6 @@ struct apic {
void (*setup_apic_routing)(void);
int (*multi_timer_check)(int apic, int irq);
- int (*apicid_to_node)(int logical_apicid);
- int (*cpu_to_logical_apicid)(int cpu);
int (*cpu_present_to_apicid)(int mps_cpu);
void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
void (*setup_portio_remap)(void);
@@ -356,6 +350,23 @@ struct apic {
void (*icr_write)(u32 low, u32 high);
void (*wait_icr_idle)(void);
u32 (*safe_wait_icr_idle)(void);
+
+#ifdef CONFIG_X86_32
+ /*
+ * Called very early during boot from get_smp_config(). It should
+ * return the logical apicid. x86_[bios]_cpu_to_apicid is
+ * initialized before this function is called.
+ *
+ * If logical apicid can't be determined that early, the function
+ * may return BAD_APICID. Logical apicid will be configured after
+ * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity
+ * won't be applied properly during early boot in this case.
+ */
+ int (*x86_32_early_logical_apicid)(int cpu);
+
+ /* determine CPU -> NUMA node mapping */
+ int (*x86_32_numa_cpu_node)(int cpu);
+#endif
};
/*
@@ -503,6 +514,11 @@ extern struct apic apic_noop;
extern struct apic apic_default;
+static inline int noop_x86_32_early_logical_apicid(int cpu)
+{
+ return BAD_APICID;
+}
+
/*
* Set up the logical destination ID.
*
@@ -522,7 +538,7 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
return cpuid_apic >> index_msb;
}
-extern int default_apicid_to_node(int logical_apicid);
+extern int default_x86_32_numa_cpu_node(int cpu);
#endif
@@ -558,12 +574,6 @@ static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_ma
*retmap = *phys_map;
}
-/* Mapping from cpu number to logical apicid */
-static inline int default_cpu_to_logical_apicid(int cpu)
-{
- return 1 << cpu;
-}
-
static inline int __default_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
@@ -596,8 +606,4 @@ extern int default_check_phys_apicid_present(int phys_apicid);
#endif /* CONFIG_X86_LOCAL_APIC */
-#ifdef CONFIG_X86_32
-extern u8 cpu_2_logical_apicid[NR_CPUS];
-#endif
-
#endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h
index 47a30ff8e517..d87988bacf3e 100644
--- a/arch/x86/include/asm/apicdef.h
+++ b/arch/x86/include/asm/apicdef.h
@@ -426,4 +426,16 @@ struct local_apic {
#else
#define BAD_APICID 0xFFFFu
#endif
+
+enum ioapic_irq_destination_types {
+ dest_Fixed = 0,
+ dest_LowestPrio = 1,
+ dest_SMI = 2,
+ dest__reserved_1 = 3,
+ dest_NMI = 4,
+ dest_INIT = 5,
+ dest__reserved_2 = 6,
+ dest_ExtINT = 7
+};
+
#endif /* _ASM_X86_APICDEF_H */
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index c8bfe63a06de..e020d88ec02d 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -12,6 +12,7 @@
/* setup data types */
#define SETUP_NONE 0
#define SETUP_E820_EXT 1
+#define SETUP_DTB 2
/* extensible setup data list node */
struct setup_data {
diff --git a/arch/x86/include/asm/ce4100.h b/arch/x86/include/asm/ce4100.h
new file mode 100644
index 000000000000..e656ad8c0a2e
--- /dev/null
+++ b/arch/x86/include/asm/ce4100.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_CE4100_H_
+#define _ASM_CE4100_H_
+
+int ce4100_pci_init(void);
+
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 220e2ea08e80..91f3e087cf21 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -160,6 +160,7 @@
#define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -279,6 +280,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
+#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index e99d55d74df5..908b96957d88 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -96,7 +96,7 @@ extern void e820_setup_gap(void);
extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
unsigned long start_addr, unsigned long long end_addr);
struct setup_data;
-extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data);
+extern void parse_e820_ext(struct setup_data *data);
#if defined(CONFIG_X86_64) || \
(defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 57650ab4a5f5..1cd6d26a0a8d 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -16,10 +16,13 @@ BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
-.irpc idx, "01234567"
+.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+.if NUM_INVALIDATE_TLB_VECTORS > \idx
BUILD_INTERRUPT3(invalidate_interrupt\idx,
(INVALIDATE_TLB_VECTOR_START)+\idx,
smp_invalidate_interrupt)
+.endif
.endr
#endif
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 06850a7194e1..2c6fc9e62812 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -7,14 +7,12 @@
frame pointer later */
#ifdef CONFIG_FRAME_POINTER
.macro FRAME
- pushl %ebp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebp
CFI_REL_OFFSET ebp,0
movl %esp,%ebp
.endm
.macro ENDFRAME
- popl %ebp
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebp
CFI_RESTORE ebp
.endm
#else
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 1f11ce44e956..d09bb03653f0 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -37,7 +37,7 @@
"+m" (*uaddr), "=&r" (tem) \
: "r" (oparg), "i" (-EFAULT), "1" (0))
-static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
+static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -48,7 +48,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
@@ -109,9 +109,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
return ret;
}
-static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
- int newval)
+static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
+ int ret = 0;
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
/* Real i386 machines have no cmpxchg instruction */
@@ -119,21 +120,22 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
return -ENOSYS;
#endif
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+ if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n"
+ asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n"
"2:\t.section .fixup, \"ax\"\n"
- "3:\tmov %2, %0\n"
+ "3:\tmov %3, %0\n"
"\tjmp 2b\n"
"\t.previous\n"
_ASM_EXTABLE(1b, 3b)
- : "=a" (oldval), "+m" (*uaddr)
- : "i" (-EFAULT), "r" (newval), "0" (oldval)
+ : "+r" (ret), "=a" (oldval), "+m" (*uaddr)
+ : "i" (-EFAULT), "r" (newval), "1" (oldval)
: "memory"
);
- return oldval;
+ *uval = oldval;
+ return ret;
}
#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 0274ec5a7e62..bb9efe8706e2 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -45,6 +45,30 @@ extern void invalidate_interrupt4(void);
extern void invalidate_interrupt5(void);
extern void invalidate_interrupt6(void);
extern void invalidate_interrupt7(void);
+extern void invalidate_interrupt8(void);
+extern void invalidate_interrupt9(void);
+extern void invalidate_interrupt10(void);
+extern void invalidate_interrupt11(void);
+extern void invalidate_interrupt12(void);
+extern void invalidate_interrupt13(void);
+extern void invalidate_interrupt14(void);
+extern void invalidate_interrupt15(void);
+extern void invalidate_interrupt16(void);
+extern void invalidate_interrupt17(void);
+extern void invalidate_interrupt18(void);
+extern void invalidate_interrupt19(void);
+extern void invalidate_interrupt20(void);
+extern void invalidate_interrupt21(void);
+extern void invalidate_interrupt22(void);
+extern void invalidate_interrupt23(void);
+extern void invalidate_interrupt24(void);
+extern void invalidate_interrupt25(void);
+extern void invalidate_interrupt26(void);
+extern void invalidate_interrupt27(void);
+extern void invalidate_interrupt28(void);
+extern void invalidate_interrupt29(void);
+extern void invalidate_interrupt30(void);
+extern void invalidate_interrupt31(void);
extern void irq_move_cleanup_interrupt(void);
extern void reboot_interrupt(void);
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 36fb1a6a5109..8dbe353e41e1 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -11,8 +11,8 @@ kernel_physical_mapping_init(unsigned long start,
unsigned long page_size_mask);
-extern unsigned long __initdata e820_table_start;
-extern unsigned long __meminitdata e820_table_end;
-extern unsigned long __meminitdata e820_table_top;
+extern unsigned long __initdata pgt_buf_start;
+extern unsigned long __meminitdata pgt_buf_end;
+extern unsigned long __meminitdata pgt_buf_top;
#endif /* _ASM_X86_INIT_32_H */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index f327d386d6cc..c4bd267dfc50 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -63,17 +63,6 @@ union IO_APIC_reg_03 {
} __attribute__ ((packed)) bits;
};
-enum ioapic_irq_destination_types {
- dest_Fixed = 0,
- dest_LowestPrio = 1,
- dest_SMI = 2,
- dest__reserved_1 = 3,
- dest_NMI = 4,
- dest_INIT = 5,
- dest__reserved_2 = 6,
- dest_ExtINT = 7
-};
-
struct IO_APIC_route_entry {
__u32 vector : 8,
delivery_mode : 3, /* 000: FIXED
@@ -106,6 +95,10 @@ struct IR_IO_APIC_route_entry {
index : 15;
} __attribute__ ((packed));
+#define IOAPIC_AUTO -1
+#define IOAPIC_EDGE 0
+#define IOAPIC_LEVEL 1
+
#ifdef CONFIG_X86_IO_APIC
/*
@@ -150,11 +143,6 @@ extern int timer_through_8259;
#define io_apic_assign_pci_irqs \
(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
-extern u8 io_apic_unique_id(u8 id);
-extern int io_apic_get_unique_id(int ioapic, int apic_id);
-extern int io_apic_get_version(int ioapic);
-extern int io_apic_get_redir_entries(int ioapic);
-
struct io_apic_irq_attr;
extern int io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr);
@@ -162,6 +150,8 @@ void setup_IO_APIC_irq_extra(u32 gsi);
extern void ioapic_and_gsi_init(void);
extern void ioapic_insert_resources(void);
+int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr);
+
extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
@@ -186,6 +176,8 @@ extern void __init pre_init_apic_IRQ0(void);
extern void mp_save_irq(struct mpc_intsrc *m);
+extern void disable_ioapic_support(void);
+
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
@@ -199,6 +191,26 @@ static inline int mp_find_ioapic(u32 gsi) { return 0; }
struct io_apic_irq_attr;
static inline int io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr) { return 0; }
+
+static inline struct IO_APIC_route_entry **alloc_ioapic_entries(void)
+{
+ return NULL;
+}
+
+static inline void free_ioapic_entries(struct IO_APIC_route_entry **ent) { }
+static inline int save_IO_APIC_setup(struct IO_APIC_route_entry **ent)
+{
+ return -ENOMEM;
+}
+
+static inline void mask_IO_APIC_setup(struct IO_APIC_route_entry **ent) { }
+static inline int restore_IO_APIC_setup(struct IO_APIC_route_entry **ent)
+{
+ return -ENOMEM;
+}
+
+static inline void mp_save_irq(struct mpc_intsrc *m) { };
+static inline void disable_ioapic_support(void) { }
#endif
#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 0b7228268a63..615fa9061b57 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -123,10 +123,6 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
int vector);
extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
int vector);
-extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
- int vector);
-extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
- int vector);
/* Avoid include hell */
#define NMI_VECTOR 0x02
@@ -150,6 +146,10 @@ static inline void __default_local_send_IPI_all(int vector)
}
#ifdef CONFIG_X86_32
+extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
+ int vector);
+extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+ int vector);
extern void default_send_IPI_mask_logical(const struct cpumask *mask,
int vector);
extern void default_send_IPI_allbutself(int vector);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index c704b38c57a2..ba870bb6dd8e 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -10,9 +10,6 @@
#include <asm/apicdef.h>
#include <asm/irq_vectors.h>
-/* Even though we don't support this, supply it to appease OF */
-static inline void irq_dispose_mapping(unsigned int virq) { }
-
static inline int irq_canonicalize(int irq)
{
return ((irq == 2) ? 9 : irq);
diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq_controller.h
new file mode 100644
index 000000000000..423bbbddf36d
--- /dev/null
+++ b/arch/x86/include/asm/irq_controller.h
@@ -0,0 +1,12 @@
+#ifndef __IRQ_CONTROLLER__
+#define __IRQ_CONTROLLER__
+
+struct irq_domain {
+ int (*xlate)(struct irq_domain *h, const u32 *intspec, u32 intsize,
+ u32 *out_hwirq, u32 *out_type);
+ void *priv;
+ struct device_node *controller;
+ struct list_head l;
+};
+
+#endif
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6af0894dafb4..6e976ee3b3ef 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_IRQ_VECTORS_H
#define _ASM_X86_IRQ_VECTORS_H
+#include <linux/threads.h>
/*
* Linux IRQ vector layout.
*
@@ -16,8 +17,8 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vectors 129 ... 237 : device interrupts
- * Vectors 238 ... 255 : special interrupts
+ * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts
+ * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
@@ -96,37 +97,43 @@
#define THRESHOLD_APIC_VECTOR 0xf9
#define REBOOT_VECTOR 0xf8
-/* f0-f7 used for spreading out TLB flushes: */
-#define INVALIDATE_TLB_VECTOR_END 0xf7
-#define INVALIDATE_TLB_VECTOR_START 0xf0
-#define NUM_INVALIDATE_TLB_VECTORS 8
-
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR 0xef
-
/*
* Generic system vector for platform specific use
*/
-#define X86_PLATFORM_IPI_VECTOR 0xed
+#define X86_PLATFORM_IPI_VECTOR 0xf7
/*
* IRQ work vector:
*/
-#define IRQ_WORK_VECTOR 0xec
+#define IRQ_WORK_VECTOR 0xf6
-#define UV_BAU_MESSAGE 0xea
+#define UV_BAU_MESSAGE 0xf5
/*
* Self IPI vector for machine checks
*/
-#define MCE_SELF_VECTOR 0xeb
+#define MCE_SELF_VECTOR 0xf4
/* Xen vector callback to receive events in a HVM domain */
-#define XEN_HVM_EVTCHN_CALLBACK 0xe9
+#define XEN_HVM_EVTCHN_CALLBACK 0xf3
+
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR 0xef
+
+/* up to 32 vectors used for spreading out TLB flushes: */
+#if NR_CPUS <= 32
+# define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS)
+#else
+# define NUM_INVALIDATE_TLB_VECTORS (32)
+#endif
+
+#define INVALIDATE_TLB_VECTOR_END (0xee)
+#define INVALIDATE_TLB_VECTOR_START \
+ (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1)
#define NR_VECTORS 256
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index ca242d35e873..518bbbb9ee59 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -13,7 +13,6 @@ enum die_val {
DIE_PANIC,
DIE_NMI,
DIE_DIE,
- DIE_NMIWATCHDOG,
DIE_KERNELDEBUG,
DIE_TRAP,
DIE_GPF,
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 0c90dd9f0505..9c7d95f6174b 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -25,7 +25,6 @@ extern int pic_mode;
#define MAX_IRQ_SOURCES 256
extern unsigned int def_to_bigsmp;
-extern u8 apicid_2_node[];
#ifdef CONFIG_X86_NUMAQ
extern int mp_bus_id_to_node[MAX_MP_BUSSES];
@@ -33,8 +32,6 @@ extern int mp_bus_id_to_local[MAX_MP_BUSSES];
extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
#endif
-#define MAX_APICID 256
-
#else /* CONFIG_X86_64: */
#define MAX_MP_BUSSES 256
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4d0dfa0d998e..823d48223400 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -36,6 +36,11 @@
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
+#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
+#define NHM_C3_AUTO_DEMOTE (1UL << 25)
+#define NHM_C1_AUTO_DEMOTE (1UL << 26)
+#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
+
#define MSR_MTRRcap 0x000000fe
#define MSR_IA32_BBL_CR_CTL 0x00000119
@@ -47,6 +52,9 @@
#define MSR_IA32_MCG_STATUS 0x0000017a
#define MSR_IA32_MCG_CTL 0x0000017b
+#define MSR_OFFCORE_RSP_0 0x000001a6
+#define MSR_OFFCORE_RSP_1 0x000001a7
+
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c76f5b92b840..07f46016d3ff 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -7,7 +7,6 @@
#ifdef CONFIG_X86_LOCAL_APIC
-extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
extern int reserve_perfctr_nmi(unsigned int);
extern void release_perfctr_nmi(unsigned int);
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400d3138..3d4dab43c994 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,57 @@
+#ifndef _ASM_X86_NUMA_H
+#define _ASM_X86_NUMA_H
+
+#include <asm/topology.h>
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_NUMA
+
+#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
+
+/*
+ * __apicid_to_node[] stores the raw mapping between physical apicid and
+ * node and is used to initialize cpu_to_node mapping.
+ *
+ * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus
+ * should be accessed by the accessors - set_apicid_to_node() and
+ * numa_cpu_node().
+ */
+extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+ __apicid_to_node[apicid] = node;
+}
+#else /* CONFIG_NUMA */
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+}
+#endif /* CONFIG_NUMA */
+
#ifdef CONFIG_X86_32
# include "numa_32.h"
#else
# include "numa_64.h"
#endif
+
+#ifdef CONFIG_NUMA
+extern void __cpuinit numa_set_node(int cpu, int node);
+extern void __cpuinit numa_clear_node(int cpu);
+extern void __init numa_init_array(void);
+extern void __init init_cpu_to_node(void);
+extern void __cpuinit numa_add_cpu(int cpu);
+extern void __cpuinit numa_remove_cpu(int cpu);
+#else /* CONFIG_NUMA */
+static inline void numa_set_node(int cpu, int node) { }
+static inline void numa_clear_node(int cpu) { }
+static inline void numa_init_array(void) { }
+static inline void init_cpu_to_node(void) { }
+static inline void numa_add_cpu(int cpu) { }
+static inline void numa_remove_cpu(int cpu) { }
+#endif /* CONFIG_NUMA */
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable);
+#endif
+
+#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index b0ef2b449a9d..c6beed1ef103 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,7 +4,12 @@
extern int numa_off;
extern int pxm_to_nid(int pxm);
-extern void numa_remove_cpu(int cpu);
+
+#ifdef CONFIG_NUMA
+extern int __cpuinit numa_cpu_node(int cpu);
+#else /* CONFIG_NUMA */
+static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
+#endif /* CONFIG_NUMA */
#ifdef CONFIG_HIGHMEM
extern void set_highmem_pages_init(void);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 0493be39607c..344eb1790b46 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,23 +2,16 @@
#define _ASM_X86_NUMA_64_H
#include <linux/nodemask.h>
-#include <asm/apicdef.h>
struct bootnode {
u64 start;
u64 end;
};
-extern int compute_hash_shift(struct bootnode *nodes, int numblks,
- int *nodeids);
-
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
-extern void numa_init_array(void);
extern int numa_off;
-extern s16 apicid_to_node[MAX_LOCAL_APIC];
-
extern unsigned long numa_free_all_bootmem(void);
extern void setup_node_bootmem(int nodeid, unsigned long start,
unsigned long end);
@@ -31,11 +24,11 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
*/
#define NODE_MIN_SIZE (4*1024*1024)
-extern void __init init_cpu_to_node(void);
-extern void __cpuinit numa_set_node(int cpu, int node);
-extern void __cpuinit numa_clear_node(int cpu);
-extern void __cpuinit numa_add_cpu(int cpu);
-extern void __cpuinit numa_remove_cpu(int cpu);
+extern nodemask_t numa_nodes_parsed __initdata;
+
+extern int __cpuinit numa_cpu_node(int cpu);
+extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+extern void __init numa_set_distance(int from, int to, int distance);
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
@@ -43,11 +36,7 @@ extern void __cpuinit numa_remove_cpu(int cpu);
void numa_emu_cmdline(char *);
#endif /* CONFIG_NUMA_EMU */
#else
-static inline void init_cpu_to_node(void) { }
-static inline void numa_set_node(int cpu, int node) { }
-static inline void numa_clear_node(int cpu) { }
-static inline void numa_add_cpu(int cpu, int node) { }
-static inline void numa_remove_cpu(int cpu) { }
+static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
#endif
#endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h
index 641988efe063..c5d3a5abbb9f 100644
--- a/arch/x86/include/asm/olpc_ofw.h
+++ b/arch/x86/include/asm/olpc_ofw.h
@@ -6,7 +6,7 @@
#define OLPC_OFW_SIG 0x2057464F /* aka "OFW " */
-#ifdef CONFIG_OLPC_OPENFIRMWARE
+#ifdef CONFIG_OLPC
extern bool olpc_ofw_is_installed(void);
@@ -26,19 +26,15 @@ extern void setup_olpc_ofw_pgd(void);
/* check if OFW was detected during boot */
extern bool olpc_ofw_present(void);
-#else /* !CONFIG_OLPC_OPENFIRMWARE */
-
-static inline bool olpc_ofw_is_installed(void) { return false; }
+#else /* !CONFIG_OLPC */
static inline void olpc_ofw_detect(void) { }
static inline void setup_olpc_ofw_pgd(void) { }
-static inline bool olpc_ofw_present(void) { return false; }
-
-#endif /* !CONFIG_OLPC_OPENFIRMWARE */
+#endif /* !CONFIG_OLPC */
-#ifdef CONFIG_OLPC_OPENFIRMWARE_DT
+#ifdef CONFIG_OF_PROMTREE
extern void olpc_dt_build_devicetree(void);
#else
static inline void olpc_dt_build_devicetree(void) { }
-#endif /* CONFIG_OLPC_OPENFIRMWARE_DT */
+#endif
#endif /* _ASM_X86_OLPC_OFW_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 1df66211fd1b..bce688d54c12 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -2,6 +2,7 @@
#define _ASM_X86_PAGE_DEFS_H
#include <linux/const.h>
+#include <linux/types.h>
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
@@ -45,11 +46,15 @@ extern int devmem_is_allowed(unsigned long pagenr);
extern unsigned long max_low_pfn_mapped;
extern unsigned long max_pfn_mapped;
+static inline phys_addr_t get_max_mapped(void)
+{
+ return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
+}
+
extern unsigned long init_memory_mapping(unsigned long start,
unsigned long end);
-extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8);
+extern void initmem_init(void);
extern void free_initmem(void);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 7e172955ee57..a09e1f052d84 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -451,6 +451,26 @@ do { \
#define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#endif /* !CONFIG_M386 */
+#ifdef CONFIG_X86_CMPXCHG64
+#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \
+({ \
+ char __ret; \
+ typeof(o1) __o1 = o1; \
+ typeof(o1) __n1 = n1; \
+ typeof(o2) __o2 = o2; \
+ typeof(o2) __n2 = n2; \
+ typeof(o2) __dummy = n2; \
+ asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \
+ : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \
+ : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \
+ __ret; \
+})
+
+#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
+#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
+#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
+#endif /* CONFIG_X86_CMPXCHG64 */
+
/*
* Per cpu atomic 64 bit operations are only available under 64 bit.
* 32 bit must fall back to generic operations.
@@ -480,6 +500,34 @@ do { \
#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
#define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval)
#define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
+
+/*
+ * Pretty complex macro to generate cmpxchg16 instruction. The instruction
+ * is not supported on early AMD64 processors so we must be able to emulate
+ * it in software. The address used in the cmpxchg16 instruction must be
+ * aligned to a 16 byte boundary.
+ */
+#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \
+({ \
+ char __ret; \
+ typeof(o1) __o1 = o1; \
+ typeof(o1) __n1 = n1; \
+ typeof(o2) __o2 = o2; \
+ typeof(o2) __n2 = n2; \
+ typeof(o2) __dummy; \
+ alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \
+ "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \
+ X86_FEATURE_CX16, \
+ ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \
+ "S" (&pcp1), "b"(__n1), "c"(__n2), \
+ "a"(__o1), "d"(__o2)); \
+ __ret; \
+})
+
+#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+
#endif
/* This is not atomic against other CPUs -- CPU preemption needs to be off */
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index e2f6a99f14ab..cc29086e30cd 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -22,6 +22,7 @@
#define ARCH_P4_CNTRVAL_BITS (40)
#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
+#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1))
#define P4_ESCR_EVENT_MASK 0x7e000000U
#define P4_ESCR_EVENT_SHIFT 25
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 45636cefa186..4c25ab48257b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -94,10 +94,6 @@ struct cpuinfo_x86 {
int x86_cache_alignment; /* In bytes */
int x86_power;
unsigned long loops_per_jiffy;
-#ifdef CONFIG_SMP
- /* cpus sharing the last level cache: */
- cpumask_var_t llc_shared_map;
-#endif
/* cpuid returned max cores value: */
u16 x86_max_cores;
u16 apicid;
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index b4ec95f07518..971e0b46446e 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -1 +1,69 @@
-/* dummy prom.h; here to make linux/of.h's #includes happy */
+/*
+ * Definitions for Device tree / OpenFirmware handling on X86
+ *
+ * based on arch/powerpc/include/asm/prom.h which is
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_X86_PROM_H
+#define _ASM_X86_PROM_H
+#ifndef __ASSEMBLY__
+
+#include <linux/of.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+
+#include <asm/irq.h>
+#include <asm/atomic.h>
+#include <asm/setup.h>
+#include <asm/irq_controller.h>
+
+#ifdef CONFIG_OF
+extern int of_ioapic;
+extern u64 initial_dtb;
+extern void add_dtb(u64 data);
+extern void x86_add_irq_domains(void);
+void __cpuinit x86_of_pci_init(void);
+void x86_dtb_init(void);
+
+static inline struct device_node *pci_device_to_OF_node(struct pci_dev *pdev)
+{
+ return pdev ? pdev->dev.of_node : NULL;
+}
+
+static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
+{
+ return pci_device_to_OF_node(bus->self);
+}
+
+#else
+static inline void add_dtb(u64 data) { }
+static inline void x86_add_irq_domains(void) { }
+static inline void x86_of_pci_init(void) { }
+static inline void x86_dtb_init(void) { }
+#define of_ioapic 0
+#endif
+
+extern char cmd_line[COMMAND_LINE_SIZE];
+
+#define pci_address_to_pio pci_address_to_pio
+unsigned long pci_address_to_pio(phys_addr_t addr);
+
+/**
+ * irq_dispose_mapping - Unmap an interrupt
+ * @virq: linux virq number of the interrupt to unmap
+ *
+ * FIXME: We really should implement proper virq handling like power,
+ * but that's going to be major surgery.
+ */
+static inline void irq_dispose_mapping(unsigned int virq) { }
+
+#define HAVE_ARCH_DEVTREE_FIXUPS
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index d1e41b0f9b60..df4cd32b4cc6 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -37,26 +37,9 @@
#endif
#ifdef __KERNEL__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/lockdep.h>
#include <asm/asm.h>
-struct rwsem_waiter;
-
-extern asmregparm struct rw_semaphore *
- rwsem_down_read_failed(struct rw_semaphore *sem);
-extern asmregparm struct rw_semaphore *
- rwsem_down_write_failed(struct rw_semaphore *sem);
-extern asmregparm struct rw_semaphore *
- rwsem_wake(struct rw_semaphore *);
-extern asmregparm struct rw_semaphore *
- rwsem_downgrade_wake(struct rw_semaphore *sem);
-
/*
- * the semaphore definition
- *
* The bias values and the counter type limits the number of
* potential readers/writers to 32767 for 32 bits and 2147483647
* for 64 bits.
@@ -74,43 +57,6 @@ extern asmregparm struct rw_semaphore *
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-typedef signed long rwsem_count_t;
-
-struct rw_semaphore {
- rwsem_count_t count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-
-#define __RWSEM_INITIALIZER(name) \
-{ \
- RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \
-}
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
/*
* lock for reading
*/
@@ -133,7 +79,7 @@ static inline void __down_read(struct rw_semaphore *sem)
*/
static inline int __down_read_trylock(struct rw_semaphore *sem)
{
- rwsem_count_t result, tmp;
+ long result, tmp;
asm volatile("# beginning __down_read_trylock\n\t"
" mov %0,%1\n\t"
"1:\n\t"
@@ -155,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
*/
static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning down_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* adds 0xffff0001, returns the old value */
@@ -180,9 +126,8 @@ static inline void __down_write(struct rw_semaphore *sem)
*/
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
- rwsem_count_t ret = cmpxchg(&sem->count,
- RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
+ long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+ RWSEM_ACTIVE_WRITE_BIAS);
if (ret == RWSEM_UNLOCKED_VALUE)
return 1;
return 0;
@@ -193,7 +138,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
*/
static inline void __up_read(struct rw_semaphore *sem)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning __up_read\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 1, returns the old value */
@@ -211,7 +156,7 @@ static inline void __up_read(struct rw_semaphore *sem)
*/
static inline void __up_write(struct rw_semaphore *sem)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning __up_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 0xffff0001, returns the old value */
@@ -247,8 +192,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
/*
* implement atomic add functionality
*/
-static inline void rwsem_atomic_add(rwsem_count_t delta,
- struct rw_semaphore *sem)
+static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
{
asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
: "+m" (sem->count)
@@ -258,10 +202,9 @@ static inline void rwsem_atomic_add(rwsem_count_t delta,
/*
* implement exchange and add functionality
*/
-static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
- struct rw_semaphore *sem)
+static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
{
- rwsem_count_t tmp = delta;
+ long tmp = delta;
asm volatile(LOCK_PREFIX "xadd %0,%1"
: "+r" (tmp), "+m" (sem->count)
@@ -270,10 +213,5 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
return tmp + delta;
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1f4695136776..73b11bc0ae6f 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -17,12 +17,24 @@
#endif
#include <asm/thread_info.h>
#include <asm/cpumask.h>
+#include <asm/cpufeature.h>
extern int smp_num_siblings;
extern unsigned int num_processors;
+static inline bool cpu_has_ht_siblings(void)
+{
+ bool has_siblings = false;
+#ifdef CONFIG_SMP
+ has_siblings = cpu_has_ht && smp_num_siblings > 1;
+#endif
+ return has_siblings;
+}
+
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+/* cpus sharing the last level cache: */
+DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
DECLARE_PER_CPU(int, cpu_number);
@@ -36,8 +48,16 @@ static inline struct cpumask *cpu_core_mask(int cpu)
return per_cpu(cpu_core_map, cpu);
}
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+ return per_cpu(cpu_llc_shared_map, cpu);
+}
+
DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
+DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
+#endif
/* Static state in head.S used to set up a CPU */
extern unsigned long stack_start; /* Initial stack pointer address */
diff --git a/arch/x86/include/asm/smpboot_hooks.h b/arch/x86/include/asm/smpboot_hooks.h
index 6c22bf353f26..725b77831993 100644
--- a/arch/x86/include/asm/smpboot_hooks.h
+++ b/arch/x86/include/asm/smpboot_hooks.h
@@ -34,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void)
*/
CMOS_WRITE(0, 0xf);
- *((volatile long *)phys_to_virt(apic->trampoline_phys_low)) = 0;
+ *((volatile u32 *)phys_to_virt(apic->trampoline_phys_low)) = 0;
}
static inline void __init smpboot_setup_io_apic(void)
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 33ecc3ea8782..12569e691ce3 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -98,8 +98,6 @@ do { \
*/
#define HAVE_DISABLE_HLT
#else
-#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
-#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
/* frame pointer must be last for get_wchan */
#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 21899cc31e52..910a7084f7f2 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -47,21 +47,6 @@
#include <asm/mpspec.h>
-#ifdef CONFIG_X86_32
-
-/* Mappings between logical cpu number and node number */
-extern int cpu_to_node_map[];
-
-/* Returns the number of the node containing CPU 'cpu' */
-static inline int __cpu_to_node(int cpu)
-{
- return cpu_to_node_map[cpu];
-}
-#define early_cpu_to_node __cpu_to_node
-#define cpu_to_node __cpu_to_node
-
-#else /* CONFIG_X86_64 */
-
/* Mappings between logical cpu number and node number */
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
@@ -84,8 +69,6 @@ static inline int early_cpu_to_node(int cpu)
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
-#endif /* CONFIG_X86_64 */
-
/* Mappings between node number and cpus on that node. */
extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
@@ -155,7 +138,7 @@ extern unsigned long node_remap_size[];
.balance_interval = 1, \
}
-#ifdef CONFIG_X86_64_ACPI_NUMA
+#ifdef CONFIG_X86_64
extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b)
#endif
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index b766a5e8ba0e..ffaf183c619a 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -346,10 +346,13 @@
#define __NR_fanotify_init 338
#define __NR_fanotify_mark 339
#define __NR_prlimit64 340
+#define __NR_name_to_handle_at 341
+#define __NR_open_by_handle_at 342
+#define __NR_clock_adjtime 343
#ifdef __KERNEL__
-#define NR_syscalls 341
+#define NR_syscalls 344
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 363e9b8a715b..5466bea670e7 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -669,6 +669,12 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init)
__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
#define __NR_prlimit64 302
__SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_name_to_handle_at 303
+__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
+#define __NR_open_by_handle_at 304
+__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
+#define __NR_clock_adjtime 305
+__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index ce1d54c8a433..3e094af443c3 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -176,7 +176,7 @@ struct bau_msg_payload {
struct bau_msg_header {
unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */
/* bits 5:0 */
- unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */
+ unsigned int base_dest_nodeid:15; /* nasid of the */
/* bits 20:6 */ /* first bit in uvhub map */
unsigned int command:8; /* message type */
/* bits 28:21 */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 64642ad019fb..643ebf2e2ad8 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -83,11 +83,13 @@ struct x86_init_paging {
* boot cpu
* @tsc_pre_init: platform function called before TSC init
* @timer_init: initialize the platform timer (default PIT/HPET)
+ * @wallclock_init: init the wallclock device
*/
struct x86_init_timers {
void (*setup_percpu_clockev)(void);
void (*tsc_pre_init)(void);
void (*timer_init)(void);
+ void (*wallclock_init)(void);
};
/**
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a3c28ae4025b..8508bfe52296 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set)
static inline int
HYPERVISOR_sched_op(int cmd, void *arg)
{
- return _hypercall2(int, sched_op_new, cmd, arg);
+ return _hypercall2(int, sched_op, cmd, arg);
}
static inline long
@@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value)
#endif
static inline int
-HYPERVISOR_suspend(unsigned long srec)
+HYPERVISOR_suspend(unsigned long start_info_mfn)
{
- return _hypercall3(int, sched_op, SCHEDOP_shutdown,
- SHUTDOWN_suspend, srec);
+ struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
+
+ /*
+ * For a PV guest the tools require that the start_info mfn be
+ * present in rdx/edx when the hypercall is made. Per the
+ * hypercall calling convention this is the third hypercall
+ * argument, which is start_info_mfn here.
+ */
+ return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn);
}
static inline int
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index f25bdf238a33..c61934fbf22a 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -29,8 +29,10 @@ typedef struct xpaddr {
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
#define INVALID_P2M_ENTRY (~0UL)
-#define FOREIGN_FRAME_BIT (1UL<<31)
+#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1))
+#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2))
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
+#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT)
/* Maximum amount of memory we can handle in a domain in pages */
#define MAX_DOMAIN_PAGES \
@@ -41,12 +43,18 @@ extern unsigned int machine_to_phys_order;
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+extern unsigned long set_phys_range_identity(unsigned long pfn_s,
+ unsigned long pfn_e);
extern int m2p_add_override(unsigned long mfn, struct page *page);
extern int m2p_remove_override(struct page *page);
extern struct page *m2p_find_override(unsigned long mfn);
extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
+#ifdef CONFIG_XEN_DEBUG_FS
+extern int p2m_dump_show(struct seq_file *m, void *v);
+#endif
static inline unsigned long pfn_to_mfn(unsigned long pfn)
{
unsigned long mfn;
@@ -57,7 +65,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn)
mfn = get_phys_to_machine(pfn);
if (mfn != INVALID_P2M_ENTRY)
- mfn &= ~FOREIGN_FRAME_BIT;
+ mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
return mfn;
}
@@ -73,25 +81,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn)
static inline unsigned long mfn_to_pfn(unsigned long mfn)
{
unsigned long pfn;
+ int ret = 0;
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
+ if (unlikely((mfn >> machine_to_phys_order) != 0)) {
+ pfn = ~0;
+ goto try_override;
+ }
pfn = 0;
/*
* The array access can fail (e.g., device space beyond end of RAM).
* In such cases it doesn't matter what we return (we return garbage),
* but we must handle the fault without crashing!
*/
- __get_user(pfn, &machine_to_phys_mapping[mfn]);
-
- /*
- * If this appears to be a foreign mfn (because the pfn
- * doesn't map back to the mfn), then check the local override
- * table to see if there's a better pfn to use.
+ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+try_override:
+ /* ret might be < 0 if there are no entries in the m2p for mfn */
+ if (ret < 0)
+ pfn = ~0;
+ else if (get_phys_to_machine(pfn) != mfn)
+ /*
+ * If this appears to be a foreign mfn (because the pfn
+ * doesn't map back to the mfn), then check the local override
+ * table to see if there's a better pfn to use.
+ *
+ * m2p_find_override_pfn returns ~0 if it doesn't find anything.
+ */
+ pfn = m2p_find_override_pfn(mfn, ~0);
+
+ /*
+ * pfn is ~0 if there are no entries in the m2p for mfn or if the
+ * entry doesn't map back to the mfn and m2p_override doesn't have a
+ * valid entry for it.
*/
- if (get_phys_to_machine(pfn) != mfn)
- pfn = m2p_find_override_pfn(mfn, pfn);
+ if (pfn == ~0 &&
+ get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn))
+ pfn = mfn;
return pfn;
}
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 2329b3eaf8d3..aa8620989162 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void)
* its own functions.
*/
struct xen_pci_frontend_ops {
- int (*enable_msi)(struct pci_dev *dev, int **vectors);
+ int (*enable_msi)(struct pci_dev *dev, int vectors[]);
void (*disable_msi)(struct pci_dev *dev);
- int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec);
+ int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec);
void (*disable_msix)(struct pci_dev *dev);
};
extern struct xen_pci_frontend_ops *xen_pci_frontend;
static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
- int **vectors)
+ int vectors[])
{
if (xen_pci_frontend && xen_pci_frontend->enable_msi)
return xen_pci_frontend->enable_msi(dev, vectors);
@@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
xen_pci_frontend->disable_msi(dev);
}
static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
- int **vectors, int nvec)
+ int vectors[], int nvec)
{
if (xen_pci_frontend && xen_pci_frontend->enable_msix)
return xen_pci_frontend->enable_msix(dev, vectors, nvec);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 778c5b93676d..743642f1a36c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -67,9 +67,9 @@ obj-$(CONFIG_PCI) += early-quirks.o
apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o
+obj-$(CONFIG_SMP) += smpboot.o
+obj-$(CONFIG_SMP) += tsc_sync.o
obj-$(CONFIG_SMP) += setup_percpu.o
-obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-y += apic/
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
@@ -109,6 +109,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
+obj-$(CONFIG_OF) += devicetree.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b3a71137983a..9a966c579af5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -72,6 +72,7 @@ u8 acpi_sci_flags __initdata;
int acpi_sci_override_gsi __initdata;
int acpi_skip_timer_override __initdata;
int acpi_use_timer_override __initdata;
+int acpi_fix_pin2_polarity __initdata;
#ifdef CONFIG_X86_LOCAL_APIC
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -415,10 +416,15 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
return 0;
}
- if (acpi_skip_timer_override &&
- intsrc->source_irq == 0 && intsrc->global_irq == 2) {
- printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
- return 0;
+ if (intsrc->source_irq == 0 && intsrc->global_irq == 2) {
+ if (acpi_skip_timer_override) {
+ printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+ return 0;
+ }
+ if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
+ intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
+ printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
+ }
}
mp_override_legacy_irq(intsrc->source_irq,
@@ -589,14 +595,8 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
nid = acpi_get_node(handle);
if (nid == -1 || !node_online(nid))
return;
-#ifdef CONFIG_X86_64
- apicid_to_node[physid] = nid;
+ set_apicid_to_node(physid, nid);
numa_set_node(cpu, nid);
-#else /* CONFIG_X86_32 */
- apicid_2_node[physid] = nid;
- cpu_to_node_map[cpu] = nid;
-#endif
-
#endif
}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 0a99f7198bc3..ed3c2e5b714a 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,7 +12,7 @@
static u32 *flush_words;
-struct pci_device_id amd_nb_misc_ids[] = {
+const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
@@ -20,6 +20,11 @@ struct pci_device_id amd_nb_misc_ids[] = {
};
EXPORT_SYMBOL(amd_nb_misc_ids);
+static struct pci_device_id amd_nb_link_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_LINK) },
+ {}
+};
+
const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
{ 0x00, 0x18, 0x20 },
{ 0xff, 0x00, 0x20 },
@@ -31,7 +36,7 @@ struct amd_northbridge_info amd_northbridges;
EXPORT_SYMBOL(amd_northbridges);
static struct pci_dev *next_northbridge(struct pci_dev *dev,
- struct pci_device_id *ids)
+ const struct pci_device_id *ids)
{
do {
dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
@@ -45,7 +50,7 @@ int amd_cache_northbridges(void)
{
int i = 0;
struct amd_northbridge *nb;
- struct pci_dev *misc;
+ struct pci_dev *misc, *link;
if (amd_nb_num())
return 0;
@@ -64,10 +69,12 @@ int amd_cache_northbridges(void)
amd_northbridges.nb = nb;
amd_northbridges.num = i;
- misc = NULL;
+ link = misc = NULL;
for (i = 0; i != amd_nb_num(); i++) {
node_to_amd_nb(i)->misc = misc =
next_northbridge(misc, amd_nb_misc_ids);
+ node_to_amd_nb(i)->link = link =
+ next_northbridge(link, amd_nb_link_ids);
}
/* some CPU families (e.g. family 0x11) do not support GART */
@@ -85,6 +92,13 @@ int amd_cache_northbridges(void)
boot_cpu_data.x86_mask >= 0x1))
amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
+ if (boot_cpu_data.x86 == 0x15)
+ amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
+
+ /* L3 cache partitioning is supported on family 0x15 */
+ if (boot_cpu_data.x86 == 0x15)
+ amd_northbridges.flags |= AMD_NB_L3_PARTITIONING;
+
return 0;
}
EXPORT_SYMBOL_GPL(amd_cache_northbridges);
@@ -93,8 +107,9 @@ EXPORT_SYMBOL_GPL(amd_cache_northbridges);
they're useless anyways */
int __init early_is_amd_nb(u32 device)
{
- struct pci_device_id *id;
+ const struct pci_device_id *id;
u32 vendor = device & 0xffff;
+
device >>= 16;
for (id = amd_nb_misc_ids; id->vendor; id++)
if (vendor == id->vendor && device == id->device)
@@ -102,6 +117,65 @@ int __init early_is_amd_nb(u32 device)
return 0;
}
+int amd_get_subcaches(int cpu)
+{
+ struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
+ unsigned int mask;
+ int cuid = 0;
+
+ if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return 0;
+
+ pci_read_config_dword(link, 0x1d4, &mask);
+
+#ifdef CONFIG_SMP
+ cuid = cpu_data(cpu).compute_unit_id;
+#endif
+ return (mask >> (4 * cuid)) & 0xf;
+}
+
+int amd_set_subcaches(int cpu, int mask)
+{
+ static unsigned int reset, ban;
+ struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
+ unsigned int reg;
+ int cuid = 0;
+
+ if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
+ return -EINVAL;
+
+ /* if necessary, collect reset state of L3 partitioning and BAN mode */
+ if (reset == 0) {
+ pci_read_config_dword(nb->link, 0x1d4, &reset);
+ pci_read_config_dword(nb->misc, 0x1b8, &ban);
+ ban &= 0x180000;
+ }
+
+ /* deactivate BAN mode if any subcaches are to be disabled */
+ if (mask != 0xf) {
+ pci_read_config_dword(nb->misc, 0x1b8, &reg);
+ pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
+ }
+
+#ifdef CONFIG_SMP
+ cuid = cpu_data(cpu).compute_unit_id;
+#endif
+ mask <<= 4 * cuid;
+ mask |= (0xf ^ (1 << cuid)) << 26;
+
+ pci_write_config_dword(nb->link, 0x1d4, mask);
+
+ /* reset BAN mode if L3 partitioning returned to reset state */
+ pci_read_config_dword(nb->link, 0x1d4, &reg);
+ if (reg == reset) {
+ pci_read_config_dword(nb->misc, 0x1b8, &reg);
+ reg &= ~0x180000;
+ pci_write_config_dword(nb->misc, 0x1b8, reg | ban);
+ }
+
+ return 0;
+}
+
int amd_cache_gart(void)
{
int i;
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 51ef31a89be9..1293c709ee85 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -284,7 +284,7 @@ static int __init apbt_clockevent_register(void)
memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device));
if (mrst_timer_options == MRST_TIMER_LAPIC_APBT) {
- apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100;
+ adev->evt.rating = APBT_CLOCKEVENT_RATING - 100;
global_clock_event = &adev->evt;
printk(KERN_DEBUG "%s clockevent registered as global\n",
global_clock_event->name);
@@ -508,64 +508,12 @@ static int apbt_next_event(unsigned long delta,
return 0;
}
-/*
- * APB timer clock is not in sync with pclk on Langwell, which translates to
- * unreliable read value caused by sampling error. the error does not add up
- * overtime and only happens when sampling a 0 as a 1 by mistake. so the time
- * would go backwards. the following code is trying to prevent time traveling
- * backwards. little bit paranoid.
- */
static cycle_t apbt_read_clocksource(struct clocksource *cs)
{
- unsigned long t0, t1, t2;
- static unsigned long last_read;
-
-bad_count:
- t1 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- t2 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- if (unlikely(t1 < t2)) {
- pr_debug("APBT: read current count error %lx:%lx:%lx\n",
- t1, t2, t2 - t1);
- goto bad_count;
- }
- /*
- * check against cached last read, makes sure time does not go back.
- * it could be a normal rollover but we will do tripple check anyway
- */
- if (unlikely(t2 > last_read)) {
- /* check if we have a normal rollover */
- unsigned long raw_intr_status =
- apbt_readl_reg(APBTMRS_RAW_INT_STATUS);
- /*
- * cs timer interrupt is masked but raw intr bit is set if
- * rollover occurs. then we read EOI reg to clear it.
- */
- if (raw_intr_status & (1 << phy_cs_timer_id)) {
- apbt_readl(phy_cs_timer_id, APBTMR_N_EOI);
- goto out;
- }
- pr_debug("APB CS going back %lx:%lx:%lx ",
- t2, last_read, t2 - last_read);
-bad_count_x3:
- pr_debug("triple check enforced\n");
- t0 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- udelay(1);
- t1 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- udelay(1);
- t2 = apbt_readl(phy_cs_timer_id,
- APBTMR_N_CURRENT_VALUE);
- if ((t2 > t1) || (t1 > t0)) {
- printk(KERN_ERR "Error: APB CS tripple check failed\n");
- goto bad_count_x3;
- }
- }
-out:
- last_read = t2;
- return (cycle_t)~t2;
+ unsigned long current_count;
+
+ current_count = apbt_readl(phy_cs_timer_id, APBTMR_N_CURRENT_VALUE);
+ return (cycle_t)~current_count;
}
static int apbt_clocksource_register(void)
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 5955a7800a96..7b1e8e10b89c 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -13,7 +13,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/mmzone.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
@@ -57,7 +57,7 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
static u32 __init allocate_aperture(void)
{
u32 aper_size;
- void *p;
+ unsigned long addr;
/* aper_size should <= 1G */
if (fallback_aper_order > 5)
@@ -83,27 +83,26 @@ static u32 __init allocate_aperture(void)
* so don't use 512M below as gart iommu, leave the space for kernel
* code for safe
*/
- p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
+ addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20);
+ if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) {
+ printk(KERN_ERR
+ "Cannot allocate aperture memory hole (%lx,%uK)\n",
+ addr, aper_size>>10);
+ return 0;
+ }
+ memblock_x86_reserve_range(addr, addr + aper_size, "aperture64");
/*
* Kmemleak should not scan this block as it may not be mapped via the
* kernel direct mapping.
*/
- kmemleak_ignore(p);
- if (!p || __pa(p)+aper_size > 0xffffffff) {
- printk(KERN_ERR
- "Cannot allocate aperture memory hole (%p,%uK)\n",
- p, aper_size>>10);
- if (p)
- free_bootmem(__pa(p), aper_size);
- return 0;
- }
+ kmemleak_ignore(phys_to_virt(addr));
printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
- aper_size >> 10, __pa(p));
- insert_aperture_resource((u32)__pa(p), aper_size);
- register_nosave_region((u32)__pa(p) >> PAGE_SHIFT,
- (u32)__pa(p+aper_size) >> PAGE_SHIFT);
+ aper_size >> 10, addr);
+ insert_aperture_resource((u32)addr, aper_size);
+ register_nosave_region(addr >> PAGE_SHIFT,
+ (addr+aper_size) >> PAGE_SHIFT);
- return (u32)__pa(p);
+ return (u32)addr;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 76b96d74978a..966673f44141 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -43,6 +43,7 @@
#include <asm/i8259.h>
#include <asm/proto.h>
#include <asm/apic.h>
+#include <asm/io_apic.h>
#include <asm/desc.h>
#include <asm/hpet.h>
#include <asm/idle.h>
@@ -78,12 +79,21 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
#ifdef CONFIG_X86_32
+
+/*
+ * On x86_32, the mapping between cpu and logical apicid may vary
+ * depending on apic in use. The following early percpu variable is
+ * used for the mapping. This is where the behaviors of x86_64 and 32
+ * actually diverge. Let's keep it ugly for now.
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
+
/*
* Knob to control our willingness to enable the local APIC.
*
* +1=force-enable
*/
-static int force_enable_local_apic;
+static int force_enable_local_apic __initdata;
/*
* APIC command line parameters
*/
@@ -153,7 +163,7 @@ early_param("nox2apic", setup_nox2apic);
unsigned long mp_lapic_addr;
int disable_apic;
/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int disable_apic_timer __cpuinitdata;
+static int disable_apic_timer __initdata;
/* Local APIC timer works in C2 */
int local_apic_timer_c2_ok;
EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -177,29 +187,8 @@ static struct resource lapic_resource = {
static unsigned int calibration_result;
-static int lapic_next_event(unsigned long delta,
- struct clock_event_device *evt);
-static void lapic_timer_setup(enum clock_event_mode mode,
- struct clock_event_device *evt);
-static void lapic_timer_broadcast(const struct cpumask *mask);
static void apic_pm_activate(void);
-/*
- * The local apic timer can be used for any function which is CPU local.
- */
-static struct clock_event_device lapic_clockevent = {
- .name = "lapic",
- .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
- | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
- .shift = 32,
- .set_mode = lapic_timer_setup,
- .set_next_event = lapic_next_event,
- .broadcast = lapic_timer_broadcast,
- .rating = 100,
- .irq = -1,
-};
-static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-
static unsigned long apic_phys;
/*
@@ -238,7 +227,7 @@ static int modern_apic(void)
* right after this call apic become NOOP driven
* so apic->write/read doesn't do anything
*/
-void apic_disable(void)
+static void __init apic_disable(void)
{
pr_info("APIC: switched to apic NOOP\n");
apic = &apic_noop;
@@ -282,23 +271,6 @@ u64 native_apic_icr_read(void)
return icr1 | ((u64)icr2 << 32);
}
-/**
- * enable_NMI_through_LVT0 - enable NMI through local vector table 0
- */
-void __cpuinit enable_NMI_through_LVT0(void)
-{
- unsigned int v;
-
- /* unmask and set to NMI */
- v = APIC_DM_NMI;
-
- /* Level triggered for 82489DX (32bit mode) */
- if (!lapic_is_integrated())
- v |= APIC_LVT_LEVEL_TRIGGER;
-
- apic_write(APIC_LVT0, v);
-}
-
#ifdef CONFIG_X86_32
/**
* get_physical_broadcast - Get number of physical broadcast IDs
@@ -508,6 +480,23 @@ static void lapic_timer_broadcast(const struct cpumask *mask)
#endif
}
+
+/*
+ * The local apic timer can be used for any function which is CPU local.
+ */
+static struct clock_event_device lapic_clockevent = {
+ .name = "lapic",
+ .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
+ | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
+ .shift = 32,
+ .set_mode = lapic_timer_setup,
+ .set_next_event = lapic_next_event,
+ .broadcast = lapic_timer_broadcast,
+ .rating = 100,
+ .irq = -1,
+};
+static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
+
/*
* Setup the local APIC timer for this CPU. Copy the initialized values
* of the boot CPU and register the clock event in the framework.
@@ -1209,7 +1198,7 @@ void __cpuinit setup_local_APIC(void)
rdtscll(tsc);
if (disable_apic) {
- arch_disable_smp_support();
+ disable_ioapic_support();
return;
}
@@ -1237,6 +1226,19 @@ void __cpuinit setup_local_APIC(void)
*/
apic->init_apic_ldr();
+#ifdef CONFIG_X86_32
+ /*
+ * APIC LDR is initialized. If logical_apicid mapping was
+ * initialized during get_smp_config(), make sure it matches the
+ * actual value.
+ */
+ i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+ WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
+ /* always use the value from LDR */
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ logical_smp_processor_id();
+#endif
+
/*
* Set Task Priority to 'accept all'. We never change this
* later on.
@@ -1448,7 +1450,7 @@ int __init enable_IR(void)
void __init enable_IR_x2apic(void)
{
unsigned long flags;
- struct IO_APIC_route_entry **ioapic_entries = NULL;
+ struct IO_APIC_route_entry **ioapic_entries;
int ret, x2apic_enabled = 0;
int dmar_table_init_ret;
@@ -1537,7 +1539,7 @@ static int __init detect_init_APIC(void)
}
#else
-static int apic_verify(void)
+static int __init apic_verify(void)
{
u32 features, h, l;
@@ -1562,7 +1564,7 @@ static int apic_verify(void)
return 0;
}
-int apic_force_enable(void)
+int __init apic_force_enable(unsigned long addr)
{
u32 h, l;
@@ -1578,7 +1580,7 @@ int apic_force_enable(void)
if (!(l & MSR_IA32_APICBASE_ENABLE)) {
pr_info("Local APIC disabled by BIOS -- reenabling.\n");
l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | addr;
wrmsr(MSR_IA32_APICBASE, l, h);
enabled_via_apicbase = 1;
}
@@ -1619,7 +1621,7 @@ static int __init detect_init_APIC(void)
"you can enable it with \"lapic\"\n");
return -1;
}
- if (apic_force_enable())
+ if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
return -1;
} else {
if (apic_verify())
@@ -1930,17 +1932,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
{
int cpu;
- /*
- * Validate version
- */
- if (version == 0x0) {
- pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- version);
- version = 0x10;
- }
- apic_version[apicid] = version;
-
if (num_processors >= nr_cpu_ids) {
int max = nr_cpu_ids;
int thiscpu = max + disabled_cpus;
@@ -1954,22 +1945,34 @@ void __cpuinit generic_processor_info(int apicid, int version)
}
num_processors++;
- cpu = cpumask_next_zero(-1, cpu_present_mask);
-
- if (version != apic_version[boot_cpu_physical_apicid])
- WARN_ONCE(1,
- "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
- apic_version[boot_cpu_physical_apicid], cpu, version);
-
- physid_set(apicid, phys_cpu_present_map);
if (apicid == boot_cpu_physical_apicid) {
/*
* x86_bios_cpu_apicid is required to have processors listed
* in same order as logical cpu numbers. Hence the first
* entry is BSP, and so on.
+ * boot_cpu_init() already hold bit 0 in cpu_present_mask
+ * for BSP.
*/
cpu = 0;
+ } else
+ cpu = cpumask_next_zero(-1, cpu_present_mask);
+
+ /*
+ * Validate version
+ */
+ if (version == 0x0) {
+ pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
+ cpu, apicid);
+ version = 0x10;
}
+ apic_version[apicid] = version;
+
+ if (version != apic_version[boot_cpu_physical_apicid]) {
+ pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
+ apic_version[boot_cpu_physical_apicid], cpu, version);
+ }
+
+ physid_set(apicid, phys_cpu_present_map);
if (apicid > max_physical_apicid)
max_physical_apicid = apicid;
@@ -1977,7 +1980,10 @@ void __cpuinit generic_processor_info(int apicid, int version)
early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
#endif
-
+#ifdef CONFIG_X86_32
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ apic->x86_32_early_logical_apicid(cpu);
+#endif
set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
}
@@ -1998,10 +2004,14 @@ void default_init_apic_ldr(void)
}
#ifdef CONFIG_X86_32
-int default_apicid_to_node(int logical_apicid)
+int default_x86_32_numa_cpu_node(int cpu)
{
-#ifdef CONFIG_SMP
- return apicid_2_node[hard_smp_processor_id()];
+#ifdef CONFIG_NUMA
+ int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+ if (apicid != BAD_APICID)
+ return __apicid_to_node[apicid];
+ return NUMA_NO_NODE;
#else
return 0;
#endif
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 09d3b17ce0c2..5652d31fe108 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -185,8 +185,6 @@ struct apic apic_flat = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
@@ -337,8 +335,6 @@ struct apic apic_physflat = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index e31b9ffe25f5..f1baa2dc087a 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void)
return 0;
}
-static int noop_cpu_to_logical_apicid(int cpu)
-{
- return 0;
-}
-
static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
{
return 0;
@@ -113,12 +108,6 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
cpumask_set_cpu(cpu, retmask);
}
-int noop_apicid_to_node(int logical_apicid)
-{
- /* we're always on node 0 */
- return 0;
-}
-
static u32 noop_apic_read(u32 reg)
{
WARN_ON_ONCE((cpu_has_apic && !disable_apic));
@@ -130,6 +119,14 @@ static void noop_apic_write(u32 reg, u32 v)
WARN_ON_ONCE(cpu_has_apic && !disable_apic);
}
+#ifdef CONFIG_X86_32
+static int noop_x86_32_numa_cpu_node(int cpu)
+{
+ /* we're always on node 0 */
+ return 0;
+}
+#endif
+
struct apic apic_noop = {
.name = "noop",
.probe = noop_probe,
@@ -153,9 +150,7 @@ struct apic apic_noop = {
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = noop_apicid_to_node,
- .cpu_to_logical_apicid = noop_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
@@ -197,4 +192,9 @@ struct apic apic_noop = {
.icr_write = noop_apic_icr_write,
.wait_icr_idle = noop_apic_wait_icr_idle,
.safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
+
+#ifdef CONFIG_X86_32
+ .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node,
+#endif
};
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index cb804c5091b9..541a2e431659 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -45,6 +45,12 @@ static unsigned long bigsmp_check_apicid_present(int bit)
return 1;
}
+static int bigsmp_early_logical_apicid(int cpu)
+{
+ /* on bigsmp, logical apicid is the same as physical */
+ return early_per_cpu(x86_cpu_to_apicid, cpu);
+}
+
static inline unsigned long calculate_ldr(int cpu)
{
unsigned long val, id;
@@ -80,11 +86,6 @@ static void bigsmp_setup_apic_routing(void)
nr_ioapics);
}
-static int bigsmp_apicid_to_node(int logical_apicid)
-{
- return apicid_2_node[hard_smp_processor_id()];
-}
-
static int bigsmp_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids)
@@ -93,14 +94,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
return BAD_APICID;
}
-/* Mapping from cpu number to logical apicid */
-static inline int bigsmp_cpu_to_logical_apicid(int cpu)
-{
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_physical_id(cpu);
-}
-
static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
@@ -115,7 +108,11 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
/* As we are using single CPU as destination, pick only one CPU here */
static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
- return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask));
+ int cpu = cpumask_first(cpumask);
+
+ if (cpu < nr_cpu_ids)
+ return cpu_physical_id(cpu);
+ return BAD_APICID;
}
static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -129,9 +126,9 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
*/
for_each_cpu_and(cpu, cpumask, andmask) {
if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
+ return cpu_physical_id(cpu);
}
- return bigsmp_cpu_to_logical_apicid(cpu);
+ return BAD_APICID;
}
static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
@@ -219,8 +216,6 @@ struct apic apic_bigsmp = {
.ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
.setup_apic_routing = bigsmp_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = bigsmp_apicid_to_node,
- .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
.cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
@@ -256,4 +251,7 @@ struct apic apic_bigsmp = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
+ .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 8593582d8022..3e9de4854c5b 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -460,6 +460,12 @@ static unsigned long es7000_check_apicid_present(int bit)
return physid_isset(bit, phys_cpu_present_map);
}
+static int es7000_early_logical_apicid(int cpu)
+{
+ /* on es7000, logical apicid is the same as physical */
+ return early_per_cpu(x86_bios_cpu_apicid, cpu);
+}
+
static unsigned long calculate_ldr(int cpu)
{
unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
@@ -504,12 +510,11 @@ static void es7000_setup_apic_routing(void)
nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
}
-static int es7000_apicid_to_node(int logical_apicid)
+static int es7000_numa_cpu_node(int cpu)
{
return 0;
}
-
static int es7000_cpu_present_to_apicid(int mps_cpu)
{
if (!mps_cpu)
@@ -528,18 +533,6 @@ static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
++cpu_id;
}
-/* Mapping from cpu number to logical apicid */
-static int es7000_cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_2_logical_apicid[cpu];
-#else
- return logical_smp_processor_id();
-#endif
-}
-
static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
@@ -561,7 +554,7 @@ static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
* The cpus in the mask must all be on the apic cluster.
*/
for_each_cpu(cpu, cpumask) {
- int new_apicid = es7000_cpu_to_logical_apicid(cpu);
+ int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
WARN(1, "Not a valid mask!");
@@ -578,7 +571,7 @@ static unsigned int
es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
const struct cpumask *andmask)
{
- int apicid = es7000_cpu_to_logical_apicid(0);
+ int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
cpumask_var_t cpumask;
if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -655,8 +648,6 @@ struct apic __refdata apic_es7000_cluster = {
.ioapic_phys_id_map = es7000_ioapic_phys_id_map,
.setup_apic_routing = es7000_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = es7000_apicid_to_node,
- .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
.cpu_present_to_apicid = es7000_cpu_present_to_apicid,
.apicid_to_cpu_present = es7000_apicid_to_cpu_present,
.setup_portio_remap = NULL,
@@ -695,6 +686,9 @@ struct apic __refdata apic_es7000_cluster = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = es7000_early_logical_apicid,
+ .x86_32_numa_cpu_node = es7000_numa_cpu_node,
};
struct apic __refdata apic_es7000 = {
@@ -720,8 +714,6 @@ struct apic __refdata apic_es7000 = {
.ioapic_phys_id_map = es7000_ioapic_phys_id_map,
.setup_apic_routing = es7000_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = es7000_apicid_to_node,
- .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
.cpu_present_to_apicid = es7000_cpu_present_to_apicid,
.apicid_to_cpu_present = es7000_apicid_to_cpu_present,
.setup_portio_remap = NULL,
@@ -758,4 +750,7 @@ struct apic __refdata apic_es7000 = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = es7000_early_logical_apicid,
+ .x86_32_numa_cpu_node = es7000_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 79fd43ca6f96..c4e557a1ebb6 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -83,7 +83,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
arch_spin_lock(&lock);
printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
show_regs(regs);
- dump_stack();
arch_spin_unlock(&lock);
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
return NOTIFY_STOP;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index ca9e2a3545a9..4b5ebd26f565 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -108,7 +108,10 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
int skip_ioapic_setup;
-void arch_disable_smp_support(void)
+/**
+ * disable_ioapic_support() - disables ioapic support at runtime
+ */
+void disable_ioapic_support(void)
{
#ifdef CONFIG_PCI
noioapicquirk = 1;
@@ -120,11 +123,14 @@ void arch_disable_smp_support(void)
static int __init parse_noapic(char *str)
{
/* disable IO-APIC */
- arch_disable_smp_support();
+ disable_ioapic_support();
return 0;
}
early_param("noapic", parse_noapic);
+static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
+ struct io_apic_irq_attr *attr);
+
/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
void mp_save_irq(struct mpc_intsrc *m)
{
@@ -181,7 +187,7 @@ int __init arch_early_irq_init(void)
irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
for (i = 0; i < count; i++) {
- set_irq_chip_data(i, &cfg[i]);
+ irq_set_chip_data(i, &cfg[i]);
zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
/*
@@ -200,7 +206,7 @@ int __init arch_early_irq_init(void)
#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg *irq_cfg(unsigned int irq)
{
- return get_irq_chip_data(irq);
+ return irq_get_chip_data(irq);
}
static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
@@ -226,7 +232,7 @@ static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
{
if (!cfg)
return;
- set_irq_chip_data(at, NULL);
+ irq_set_chip_data(at, NULL);
free_cpumask_var(cfg->domain);
free_cpumask_var(cfg->old_domain);
kfree(cfg);
@@ -256,14 +262,14 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
if (res < 0) {
if (res != -EEXIST)
return NULL;
- cfg = get_irq_chip_data(at);
+ cfg = irq_get_chip_data(at);
if (cfg)
return cfg;
}
cfg = alloc_irq_cfg(at, node);
if (cfg)
- set_irq_chip_data(at, cfg);
+ irq_set_chip_data(at, cfg);
else
irq_free_desc(at);
return cfg;
@@ -818,7 +824,7 @@ static int EISA_ELCR(unsigned int irq)
#define default_MCA_trigger(idx) (1)
#define default_MCA_polarity(idx) default_ISA_polarity(idx)
-static int MPBIOS_polarity(int idx)
+static int irq_polarity(int idx)
{
int bus = mp_irqs[idx].srcbus;
int polarity;
@@ -860,7 +866,7 @@ static int MPBIOS_polarity(int idx)
return polarity;
}
-static int MPBIOS_trigger(int idx)
+static int irq_trigger(int idx)
{
int bus = mp_irqs[idx].srcbus;
int trigger;
@@ -932,16 +938,6 @@ static int MPBIOS_trigger(int idx)
return trigger;
}
-static inline int irq_polarity(int idx)
-{
- return MPBIOS_polarity(idx);
-}
-
-static inline int irq_trigger(int idx)
-{
- return MPBIOS_trigger(idx);
-}
-
static int pin_2_irq(int idx, int apic, int pin)
{
int irq;
@@ -1189,7 +1185,7 @@ void __setup_vector_irq(int cpu)
raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
for_each_active_irq(irq) {
- cfg = get_irq_chip_data(irq);
+ cfg = irq_get_chip_data(irq);
if (!cfg)
continue;
/*
@@ -1220,10 +1216,6 @@ void __setup_vector_irq(int cpu)
static struct irq_chip ioapic_chip;
static struct irq_chip ir_ioapic_chip;
-#define IOAPIC_AUTO -1
-#define IOAPIC_EDGE 0
-#define IOAPIC_LEVEL 1
-
#ifdef CONFIG_X86_32
static inline int IO_APIC_irq_trigger(int irq)
{
@@ -1248,35 +1240,31 @@ static inline int IO_APIC_irq_trigger(int irq)
}
#endif
-static void ioapic_register_intr(unsigned int irq, unsigned long trigger)
+static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
+ unsigned long trigger)
{
+ struct irq_chip *chip = &ioapic_chip;
+ irq_flow_handler_t hdl;
+ bool fasteoi;
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
+ trigger == IOAPIC_LEVEL) {
irq_set_status_flags(irq, IRQ_LEVEL);
- else
+ fasteoi = true;
+ } else {
irq_clear_status_flags(irq, IRQ_LEVEL);
+ fasteoi = false;
+ }
- if (irq_remapped(get_irq_chip_data(irq))) {
+ if (irq_remapped(cfg)) {
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- if (trigger)
- set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
- handle_fasteoi_irq,
- "fasteoi");
- else
- set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
- handle_edge_irq, "edge");
- return;
+ chip = &ir_ioapic_chip;
+ fasteoi = trigger != 0;
}
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_fasteoi_irq,
- "fasteoi");
- else
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
- handle_edge_irq, "edge");
+ hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
+ irq_set_chip_and_handler_name(irq, chip, hdl,
+ fasteoi ? "fasteoi" : "edge");
}
static int setup_ioapic_entry(int apic_id, int irq,
@@ -1374,7 +1362,7 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
return;
}
- ioapic_register_intr(irq, trigger);
+ ioapic_register_intr(irq, cfg, trigger);
if (irq < legacy_pic->nr_legacy_irqs)
legacy_pic->mask(irq);
@@ -1385,33 +1373,26 @@ static struct {
DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
} mp_ioapic_routing[MAX_IO_APICS];
-static void __init setup_IO_APIC_irqs(void)
+static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin)
{
- int apic_id, pin, idx, irq, notcon = 0;
- int node = cpu_to_node(0);
- struct irq_cfg *cfg;
+ if (idx != -1)
+ return false;
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+ apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
+ mp_ioapics[apic_id].apicid, pin);
+ return true;
+}
+
+static void __init __io_apic_setup_irqs(unsigned int apic_id)
+{
+ int idx, node = cpu_to_node(0);
+ struct io_apic_irq_attr attr;
+ unsigned int pin, irq;
- for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
idx = find_irq_entry(apic_id, pin, mp_INT);
- if (idx == -1) {
- if (!notcon) {
- notcon = 1;
- apic_printk(APIC_VERBOSE,
- KERN_DEBUG " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
- } else
- apic_printk(APIC_VERBOSE, " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
+ if (io_apic_pin_not_connected(idx, apic_id, pin))
continue;
- }
- if (notcon) {
- apic_printk(APIC_VERBOSE,
- " (apicid-pin) not connected\n");
- notcon = 0;
- }
irq = pin_2_irq(idx, apic_id, pin);
@@ -1423,25 +1404,24 @@ static void __init setup_IO_APIC_irqs(void)
* installed and if it returns 1:
*/
if (apic->multi_timer_check &&
- apic->multi_timer_check(apic_id, irq))
+ apic->multi_timer_check(apic_id, irq))
continue;
- cfg = alloc_irq_and_cfg_at(irq, node);
- if (!cfg)
- continue;
+ set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+ irq_polarity(idx));
- add_pin_to_irq_node(cfg, node, apic_id, pin);
- /*
- * don't mark it in pin_programmed, so later acpi could
- * set it correctly when irq < 16
- */
- setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx),
- irq_polarity(idx));
+ io_apic_setup_irq_pin(irq, node, &attr);
}
+}
- if (notcon)
- apic_printk(APIC_VERBOSE,
- " (apicid-pin) not connected\n");
+static void __init setup_IO_APIC_irqs(void)
+{
+ unsigned int apic_id;
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+ for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
+ __io_apic_setup_irqs(apic_id);
}
/*
@@ -1452,7 +1432,7 @@ static void __init setup_IO_APIC_irqs(void)
void setup_IO_APIC_irq_extra(u32 gsi)
{
int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
- struct irq_cfg *cfg;
+ struct io_apic_irq_attr attr;
/*
* Convert 'gsi' to 'ioapic.pin'.
@@ -1472,21 +1452,10 @@ void setup_IO_APIC_irq_extra(u32 gsi)
if (apic_id == 0 || irq < NR_IRQS_LEGACY)
return;
- cfg = alloc_irq_and_cfg_at(irq, node);
- if (!cfg)
- return;
-
- add_pin_to_irq_node(cfg, node, apic_id, pin);
-
- if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mp_ioapics[apic_id].apicid, pin);
- return;
- }
- set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+ set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
+ irq_polarity(idx));
- setup_ioapic_irq(apic_id, pin, irq, cfg,
- irq_trigger(idx), irq_polarity(idx));
+ io_apic_setup_irq_pin_once(irq, node, &attr);
}
/*
@@ -1518,7 +1487,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
* The timer IRQ doesn't have to know that behind the
* scene we may have a 8259A-master in AEOI mode ...
*/
- set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
+ irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
+ "edge");
/*
* Add it to the IO-APIC irq-routing table:
@@ -1625,7 +1595,7 @@ __apicdebuginit(void) print_IO_APIC(void)
for_each_active_irq(irq) {
struct irq_pin_list *entry;
- cfg = get_irq_chip_data(irq);
+ cfg = irq_get_chip_data(irq);
if (!cfg)
continue;
entry = cfg->irq_2_pin;
@@ -2391,7 +2361,7 @@ static void irq_complete_move(struct irq_cfg *cfg)
void irq_force_complete_move(int irq)
{
- struct irq_cfg *cfg = get_irq_chip_data(irq);
+ struct irq_cfg *cfg = irq_get_chip_data(irq);
if (!cfg)
return;
@@ -2405,7 +2375,7 @@ static inline void irq_complete_move(struct irq_cfg *cfg) { }
static void ack_apic_edge(struct irq_data *data)
{
irq_complete_move(data->chip_data);
- move_native_irq(data->irq);
+ irq_move_irq(data);
ack_APIC_irq();
}
@@ -2462,7 +2432,7 @@ static void ack_apic_level(struct irq_data *data)
irq_complete_move(cfg);
#ifdef CONFIG_GENERIC_PENDING_IRQ
/* If we are moving the irq we need to mask it */
- if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
+ if (unlikely(irqd_is_setaffinity_pending(data))) {
do_unmask_irq = 1;
mask_ioapic(cfg);
}
@@ -2551,7 +2521,7 @@ static void ack_apic_level(struct irq_data *data)
* and you can go talk to the chipset vendor about it.
*/
if (!io_apic_level_ack_pending(cfg))
- move_masked_irq(irq);
+ irq_move_masked_irq(data);
unmask_ioapic(cfg);
}
}
@@ -2614,7 +2584,7 @@ static inline void init_IO_APIC_traps(void)
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
for_each_active_irq(irq) {
- cfg = get_irq_chip_data(irq);
+ cfg = irq_get_chip_data(irq);
if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
/*
* Hmm.. We don't have an entry for this,
@@ -2625,7 +2595,7 @@ static inline void init_IO_APIC_traps(void)
legacy_pic->make_irq(irq);
else
/* Strange. Oh, well.. */
- set_irq_chip(irq, &no_irq_chip);
+ irq_set_chip(irq, &no_irq_chip);
}
}
}
@@ -2665,7 +2635,7 @@ static struct irq_chip lapic_chip __read_mostly = {
static void lapic_register_intr(int irq)
{
irq_clear_status_flags(irq, IRQ_LEVEL);
- set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+ irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
"edge");
}
@@ -2749,7 +2719,7 @@ int timer_through_8259 __initdata;
*/
static inline void __init check_timer(void)
{
- struct irq_cfg *cfg = get_irq_chip_data(0);
+ struct irq_cfg *cfg = irq_get_chip_data(0);
int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
unsigned long flags;
@@ -3060,7 +3030,7 @@ unsigned int create_irq_nr(unsigned int from, int node)
raw_spin_unlock_irqrestore(&vector_lock, flags);
if (ret) {
- set_irq_chip_data(irq, cfg);
+ irq_set_chip_data(irq, cfg);
irq_clear_status_flags(irq, IRQ_NOREQUEST);
} else {
free_irq_at(irq, cfg);
@@ -3085,7 +3055,7 @@ int create_irq(void)
void destroy_irq(unsigned int irq)
{
- struct irq_cfg *cfg = get_irq_chip_data(irq);
+ struct irq_cfg *cfg = irq_get_chip_data(irq);
unsigned long flags;
irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
@@ -3119,7 +3089,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
- if (irq_remapped(get_irq_chip_data(irq))) {
+ if (irq_remapped(cfg)) {
struct irte irte;
int ir_index;
u16 sub_handle;
@@ -3291,6 +3261,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
{
+ struct irq_chip *chip = &msi_chip;
struct msi_msg msg;
int ret;
@@ -3298,14 +3269,15 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
if (ret < 0)
return ret;
- set_irq_msi(irq, msidesc);
+ irq_set_msi_desc(irq, msidesc);
write_msi_msg(irq, &msg);
- if (irq_remapped(get_irq_chip_data(irq))) {
+ if (irq_remapped(irq_get_chip_data(irq))) {
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
- } else
- set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+ chip = &msi_ir_chip;
+ }
+
+ irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
@@ -3423,8 +3395,8 @@ int arch_setup_dmar_msi(unsigned int irq)
if (ret < 0)
return ret;
dmar_msi_write(irq, &msg);
- set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
- "edge");
+ irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+ "edge");
return 0;
}
#endif
@@ -3482,6 +3454,7 @@ static struct irq_chip hpet_msi_type = {
int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
{
+ struct irq_chip *chip = &hpet_msi_type;
struct msi_msg msg;
int ret;
@@ -3501,15 +3474,12 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
if (ret < 0)
return ret;
- hpet_msi_write(get_irq_data(irq), &msg);
+ hpet_msi_write(irq_get_handler_data(irq), &msg);
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- if (irq_remapped(get_irq_chip_data(irq)))
- set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
- handle_edge_irq, "edge");
- else
- set_irq_chip_and_handler_name(irq, &hpet_msi_type,
- handle_edge_irq, "edge");
+ if (irq_remapped(irq_get_chip_data(irq)))
+ chip = &ir_hpet_msi_type;
+ irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
return 0;
}
#endif
@@ -3596,7 +3566,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
write_ht_irq_msg(irq, &msg);
- set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+ irq_set_chip_and_handler_name(irq, &ht_irq_chip,
handle_edge_irq, "edge");
dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
@@ -3605,7 +3575,40 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
}
#endif /* CONFIG_HT_IRQ */
-int __init io_apic_get_redir_entries (int ioapic)
+int
+io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
+{
+ struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
+ int ret;
+
+ if (!cfg)
+ return -EINVAL;
+ ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
+ if (!ret)
+ setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg,
+ attr->trigger, attr->polarity);
+ return ret;
+}
+
+static int io_apic_setup_irq_pin_once(unsigned int irq, int node,
+ struct io_apic_irq_attr *attr)
+{
+ unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
+ int ret;
+
+ /* Avoid redundant programming */
+ if (test_bit(pin, mp_ioapic_routing[id].pin_programmed)) {
+ pr_debug("Pin %d-%d already programmed\n",
+ mp_ioapics[id].apicid, pin);
+ return 0;
+ }
+ ret = io_apic_setup_irq_pin(irq, node, attr);
+ if (!ret)
+ set_bit(pin, mp_ioapic_routing[id].pin_programmed);
+ return ret;
+}
+
+static int __init io_apic_get_redir_entries(int ioapic)
{
union IO_APIC_reg_01 reg_01;
unsigned long flags;
@@ -3659,96 +3662,24 @@ int __init arch_probe_nr_irqs(void)
}
#endif
-static int __io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr)
+int io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr)
{
- struct irq_cfg *cfg;
int node;
- int ioapic, pin;
- int trigger, polarity;
- ioapic = irq_attr->ioapic;
if (!IO_APIC_IRQ(irq)) {
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- ioapic);
+ irq_attr->ioapic);
return -EINVAL;
}
- if (dev)
- node = dev_to_node(dev);
- else
- node = cpu_to_node(0);
-
- cfg = alloc_irq_and_cfg_at(irq, node);
- if (!cfg)
- return 0;
-
- pin = irq_attr->ioapic_pin;
- trigger = irq_attr->trigger;
- polarity = irq_attr->polarity;
+ node = dev ? dev_to_node(dev) : cpu_to_node(0);
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= legacy_pic->nr_legacy_irqs) {
- if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) {
- printk(KERN_INFO "can not add pin %d for irq %d\n",
- pin, irq);
- return 0;
- }
- }
-
- setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity);
-
- return 0;
+ return io_apic_setup_irq_pin_once(irq, node, irq_attr);
}
-int io_apic_set_pci_routing(struct device *dev, int irq,
- struct io_apic_irq_attr *irq_attr)
-{
- int ioapic, pin;
- /*
- * Avoid pin reprogramming. PRTs typically include entries
- * with redundant pin->gsi mappings (but unique PCI devices);
- * we only program the IOAPIC on the first.
- */
- ioapic = irq_attr->ioapic;
- pin = irq_attr->ioapic_pin;
- if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mp_ioapics[ioapic].apicid, pin);
- return 0;
- }
- set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
-
- return __io_apic_set_pci_routing(dev, irq, irq_attr);
-}
-
-u8 __init io_apic_unique_id(u8 id)
-{
#ifdef CONFIG_X86_32
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
- !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
- return io_apic_get_unique_id(nr_ioapics, id);
- else
- return id;
-#else
- int i;
- DECLARE_BITMAP(used, 256);
-
- bitmap_zero(used, 256);
- for (i = 0; i < nr_ioapics; i++) {
- struct mpc_ioapic *ia = &mp_ioapics[i];
- __set_bit(ia->apicid, used);
- }
- if (!test_bit(id, used))
- return id;
- return find_first_zero_bit(used, 256);
-#endif
-}
-
-#ifdef CONFIG_X86_32
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
+static int __init io_apic_get_unique_id(int ioapic, int apic_id)
{
union IO_APIC_reg_00 reg_00;
static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -3821,9 +3752,33 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
return apic_id;
}
+
+static u8 __init io_apic_unique_id(u8 id)
+{
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+ !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ return io_apic_get_unique_id(nr_ioapics, id);
+ else
+ return id;
+}
+#else
+static u8 __init io_apic_unique_id(u8 id)
+{
+ int i;
+ DECLARE_BITMAP(used, 256);
+
+ bitmap_zero(used, 256);
+ for (i = 0; i < nr_ioapics; i++) {
+ struct mpc_ioapic *ia = &mp_ioapics[i];
+ __set_bit(ia->apicid, used);
+ }
+ if (!test_bit(id, used))
+ return id;
+ return find_first_zero_bit(used, 256);
+}
#endif
-int __init io_apic_get_version(int ioapic)
+static int __init io_apic_get_version(int ioapic)
{
union IO_APIC_reg_01 reg_01;
unsigned long flags;
@@ -3868,8 +3823,8 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
void __init setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;
- struct irq_desc *desc;
const struct cpumask *mask;
+ struct irq_data *idata;
if (skip_ioapic_setup == 1)
return;
@@ -3884,21 +3839,20 @@ void __init setup_ioapic_dest(void)
if ((ioapic > 0) && (irq > 16))
continue;
- desc = irq_to_desc(irq);
+ idata = irq_get_irq_data(irq);
/*
* Honour affinities which have been set in early boot
*/
- if (desc->status &
- (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
- mask = desc->irq_data.affinity;
+ if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
+ mask = idata->affinity;
else
mask = apic->target_cpus();
if (intr_remapping_enabled)
- ir_ioapic_set_affinity(&desc->irq_data, mask, false);
+ ir_ioapic_set_affinity(idata, mask, false);
else
- ioapic_set_affinity(&desc->irq_data, mask, false);
+ ioapic_set_affinity(idata, mask, false);
}
}
@@ -4026,7 +3980,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
return gsi - mp_gsi_routing[ioapic].gsi_base;
}
-static int bad_ioapic(unsigned long address)
+static __init int bad_ioapic(unsigned long address)
{
if (nr_ioapics >= MAX_IO_APICS) {
printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
@@ -4086,20 +4040,16 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
/* Enable IOAPIC early just for system timer */
void __init pre_init_apic_IRQ0(void)
{
- struct irq_cfg *cfg;
+ struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
printk(KERN_INFO "Early APIC setup for system timer0\n");
#ifndef CONFIG_SMP
physid_set_mask_of_physid(boot_cpu_physical_apicid,
&phys_cpu_present_map);
#endif
- /* Make sure the irq descriptor is set up */
- cfg = alloc_irq_and_cfg_at(0, 0);
-
setup_local_APIC();
- add_pin_to_irq_node(cfg, 0, 0, 0);
- set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
-
- setup_ioapic_irq(0, 0, 0, cfg, 0, 0);
+ io_apic_setup_irq_pin(0, 0, &attr);
+ irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
+ "edge");
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 08385e090a6f..cce91bf26676 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -56,6 +56,8 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
local_irq_restore(flags);
}
+#ifdef CONFIG_X86_32
+
void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
int vector)
{
@@ -71,8 +73,8 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
local_irq_save(flags);
for_each_cpu(query_cpu, mask)
__default_send_IPI_dest_field(
- apic->cpu_to_logical_apicid(query_cpu), vector,
- apic->dest_logical);
+ early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
local_irq_restore(flags);
}
@@ -90,14 +92,12 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
if (query_cpu == this_cpu)
continue;
__default_send_IPI_dest_field(
- apic->cpu_to_logical_apicid(query_cpu), vector,
- apic->dest_logical);
+ early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
}
local_irq_restore(flags);
}
-#ifdef CONFIG_X86_32
-
/*
* This is only used on smaller machines.
*/
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 960f26ab5c9f..6273eee5134b 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -373,13 +373,6 @@ static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask
return physids_promote(0xFUL, retmap);
}
-static inline int numaq_cpu_to_logical_apicid(int cpu)
-{
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_2_logical_apicid[cpu];
-}
-
/*
* Supporting over 60 cpus on NUMA-Q requires a locality-dependent
* cpu to APIC ID relation to properly interact with the intelligent
@@ -398,6 +391,15 @@ static inline int numaq_apicid_to_node(int logical_apicid)
return logical_apicid >> 4;
}
+static int numaq_numa_cpu_node(int cpu)
+{
+ int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+ if (logical_apicid != BAD_APICID)
+ return numaq_apicid_to_node(logical_apicid);
+ return NUMA_NO_NODE;
+}
+
static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
{
int node = numaq_apicid_to_node(logical_apicid);
@@ -508,8 +510,6 @@ struct apic __refdata apic_numaq = {
.ioapic_phys_id_map = numaq_ioapic_phys_id_map,
.setup_apic_routing = numaq_setup_apic_routing,
.multi_timer_check = numaq_multi_timer_check,
- .apicid_to_node = numaq_apicid_to_node,
- .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid,
.cpu_present_to_apicid = numaq_cpu_present_to_apicid,
.apicid_to_cpu_present = numaq_apicid_to_cpu_present,
.setup_portio_remap = numaq_setup_portio_remap,
@@ -547,4 +547,7 @@ struct apic __refdata apic_numaq = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = numaq_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 99d2fe016084..fc84c7b61108 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -77,6 +77,11 @@ void __init default_setup_apic_routing(void)
apic->setup_apic_routing();
}
+static int default_x86_32_early_logical_apicid(int cpu)
+{
+ return 1 << cpu;
+}
+
static void setup_apic_flat_routing(void)
{
#ifdef CONFIG_X86_IO_APIC
@@ -130,8 +135,6 @@ struct apic apic_default = {
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = setup_apic_flat_routing,
.multi_timer_check = NULL,
- .apicid_to_node = default_apicid_to_node,
- .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
@@ -167,6 +170,9 @@ struct apic apic_default = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9b419263d90d..e4b8059b414a 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -194,11 +194,10 @@ static unsigned long summit_check_apicid_present(int bit)
return 1;
}
-static void summit_init_apic_ldr(void)
+static int summit_early_logical_apicid(int cpu)
{
- unsigned long val, id;
int count = 0;
- u8 my_id = (u8)hard_smp_processor_id();
+ u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu);
u8 my_cluster = APIC_CLUSTER(my_id);
#ifdef CONFIG_SMP
u8 lid;
@@ -206,7 +205,7 @@ static void summit_init_apic_ldr(void)
/* Create logical APIC IDs by counting CPUs already in cluster. */
for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
- lid = cpu_2_logical_apicid[i];
+ lid = early_per_cpu(x86_cpu_to_logical_apicid, i);
if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
++count;
}
@@ -214,7 +213,15 @@ static void summit_init_apic_ldr(void)
/* We only have a 4 wide bitmap in cluster mode. If a deranged
* BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
- id = my_cluster | (1UL << count);
+ return my_cluster | (1UL << count);
+}
+
+static void summit_init_apic_ldr(void)
+{
+ int cpu = smp_processor_id();
+ unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+ unsigned long val;
+
apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
val |= SET_APIC_LOGICAL_ID(id);
@@ -232,27 +239,6 @@ static void summit_setup_apic_routing(void)
nr_ioapics);
}
-static int summit_apicid_to_node(int logical_apicid)
-{
-#ifdef CONFIG_SMP
- return apicid_2_node[hard_smp_processor_id()];
-#else
- return 0;
-#endif
-}
-
-/* Mapping from cpu number to logical apicid */
-static inline int summit_cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_2_logical_apicid[cpu];
-#else
- return logical_smp_processor_id();
-#endif
-}
-
static int summit_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids)
@@ -286,7 +272,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
* The cpus in the mask must all be on the apic cluster.
*/
for_each_cpu(cpu, cpumask) {
- int new_apicid = summit_cpu_to_logical_apicid(cpu);
+ int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
printk("%s: Not a valid mask!\n", __func__);
@@ -301,7 +287,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
const struct cpumask *andmask)
{
- int apicid = summit_cpu_to_logical_apicid(0);
+ int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
cpumask_var_t cpumask;
if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -528,8 +514,6 @@ struct apic apic_summit = {
.ioapic_phys_id_map = summit_ioapic_phys_id_map,
.setup_apic_routing = summit_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = summit_apicid_to_node,
- .cpu_to_logical_apicid = summit_cpu_to_logical_apicid,
.cpu_present_to_apicid = summit_cpu_present_to_apicid,
.apicid_to_cpu_present = summit_apicid_to_cpu_present,
.setup_portio_remap = NULL,
@@ -565,4 +549,7 @@ struct apic apic_summit = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = summit_early_logical_apicid,
+ .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cf69c59f4910..90949bbd566d 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -206,8 +206,6 @@ struct apic apic_x2apic_cluster = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8972f38c5ced..c7e6d6645bf4 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -195,8 +195,6 @@ struct apic apic_x2apic_phys = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index bd16b58b8850..3c289281394c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -338,8 +338,6 @@ struct apic __refdata apic_x2apic_uv_x = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index b929108eb58f..9079926a5b18 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -227,6 +227,7 @@
#include <linux/suspend.h>
#include <linux/kthread.h>
#include <linux/jiffies.h>
+#include <linux/acpi.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -2321,12 +2322,11 @@ static int __init apm_init(void)
apm_info.disabled = 1;
return -ENODEV;
}
- if (pm_flags & PM_ACPI) {
+ if (!acpi_disabled) {
printk(KERN_NOTICE "apm: overridden by ACPI.\n");
apm_info.disabled = 1;
return -ENODEV;
}
- pm_flags |= PM_APM;
/*
* Set up the long jump entry point to the APM BIOS, which is called
@@ -2418,7 +2418,6 @@ static void __exit apm_exit(void)
kthread_stop(kapmd_task);
kapmd_task = NULL;
}
- pm_flags &= ~PM_APM;
}
module_init(apm_init);
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index cfa82c899f47..4f13fafc5264 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -1,5 +1,70 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+#define COMPILE_OFFSETS
+
+#include <linux/crypto.h>
+#include <linux/sched.h>
+#include <linux/stddef.h>
+#include <linux/hardirq.h>
+#include <linux/suspend.h>
+#include <linux/kbuild.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/sigframe.h>
+#include <asm/bootparam.h>
+#include <asm/suspend.h>
+
+#ifdef CONFIG_XEN
+#include <xen/interface/xen.h>
+#endif
+
#ifdef CONFIG_X86_32
# include "asm-offsets_32.c"
#else
# include "asm-offsets_64.c"
#endif
+
+void common(void) {
+ BLANK();
+ OFFSET(TI_flags, thread_info, flags);
+ OFFSET(TI_status, thread_info, status);
+ OFFSET(TI_addr_limit, thread_info, addr_limit);
+ OFFSET(TI_preempt_count, thread_info, preempt_count);
+
+ BLANK();
+ OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
+
+ BLANK();
+ OFFSET(pbe_address, pbe, address);
+ OFFSET(pbe_orig_address, pbe, orig_address);
+ OFFSET(pbe_next, pbe, next);
+
+#ifdef CONFIG_PARAVIRT
+ BLANK();
+ OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
+ OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
+ OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
+ OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
+ OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
+ OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+ OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
+ OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+#endif
+
+#ifdef CONFIG_XEN
+ BLANK();
+ OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
+ OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+#endif
+
+ BLANK();
+ OFFSET(BP_scratch, boot_params, scratch);
+ OFFSET(BP_loadflags, boot_params, hdr.loadflags);
+ OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
+ OFFSET(BP_version, boot_params, hdr.version);
+ OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
+}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a4088dda37a..c29d631af6fc 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -1,26 +1,4 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- */
-
-#include <linux/crypto.h>
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/personality.h>
-#include <linux/suspend.h>
-#include <linux/kbuild.h>
#include <asm/ucontext.h>
-#include <asm/sigframe.h>
-#include <asm/pgtable.h>
-#include <asm/fixmap.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/bootparam.h>
-#include <asm/elf.h>
-#include <asm/suspend.h>
-
-#include <xen/interface/xen.h>
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
@@ -51,21 +29,10 @@ void foo(void)
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
BLANK();
- OFFSET(TI_task, thread_info, task);
- OFFSET(TI_exec_domain, thread_info, exec_domain);
- OFFSET(TI_flags, thread_info, flags);
- OFFSET(TI_status, thread_info, status);
- OFFSET(TI_preempt_count, thread_info, preempt_count);
- OFFSET(TI_addr_limit, thread_info, addr_limit);
- OFFSET(TI_restart_block, thread_info, restart_block);
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
OFFSET(TI_cpu, thread_info, cpu);
BLANK();
- OFFSET(GDS_size, desc_ptr, size);
- OFFSET(GDS_address, desc_ptr, address);
- BLANK();
-
OFFSET(PT_EBX, pt_regs, bx);
OFFSET(PT_ECX, pt_regs, cx);
OFFSET(PT_EDX, pt_regs, dx);
@@ -85,42 +52,13 @@ void foo(void)
OFFSET(PT_OLDSS, pt_regs, ss);
BLANK();
- OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
BLANK();
- OFFSET(pbe_address, pbe, address);
- OFFSET(pbe_orig_address, pbe, orig_address);
- OFFSET(pbe_next, pbe, next);
-
/* Offset from the sysenter stack to tss.sp0 */
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
sizeof(struct tss_struct));
- DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
- DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
- DEFINE(THREAD_SIZE_asm, THREAD_SIZE);
-
- OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
-
-#ifdef CONFIG_PARAVIRT
- BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
- OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
- OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
- OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
- OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
- OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
- OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
-#endif
-
-#ifdef CONFIG_XEN
- BLANK();
- OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
- OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
-#endif
-
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
@@ -139,11 +77,4 @@ void foo(void)
OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
#endif
-
- BLANK();
- OFFSET(BP_scratch, boot_params, scratch);
- OFFSET(BP_loadflags, boot_params, hdr.loadflags);
- OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
- OFFSET(BP_version, boot_params, hdr.version);
- OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4a6aeedcd965..e72a1194af22 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -1,27 +1,4 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed to extract
- * and format the required data.
- */
-#define COMPILE_OFFSETS
-
-#include <linux/crypto.h>
-#include <linux/sched.h>
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/hardirq.h>
-#include <linux/suspend.h>
-#include <linux/kbuild.h>
-#include <asm/processor.h>
-#include <asm/segment.h>
-#include <asm/thread_info.h>
#include <asm/ia32.h>
-#include <asm/bootparam.h>
-#include <asm/suspend.h>
-
-#include <xen/interface/xen.h>
-
-#include <asm/sigframe.h>
#define __NO_STUBS 1
#undef __SYSCALL
@@ -33,41 +10,19 @@ static char syscalls[] = {
int main(void)
{
-#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
- ENTRY(state);
- ENTRY(flags);
- ENTRY(pid);
- BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry))
- ENTRY(flags);
- ENTRY(addr_limit);
- ENTRY(preempt_count);
- ENTRY(status);
-#ifdef CONFIG_IA32_EMULATION
- ENTRY(sysenter_return);
-#endif
- BLANK();
-#undef ENTRY
#ifdef CONFIG_PARAVIRT
- BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
- OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
- OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
- OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
- OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
- OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
- OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+ BLANK();
#endif
-
#ifdef CONFIG_IA32_EMULATION
-#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry))
+ OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+ BLANK();
+
+#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
ENTRY(ax);
ENTRY(bx);
ENTRY(cx);
@@ -79,15 +34,12 @@ int main(void)
ENTRY(ip);
BLANK();
#undef ENTRY
- DEFINE(IA32_RT_SIGFRAME_sigcontext,
- offsetof (struct rt_sigframe_ia32, uc.uc_mcontext));
+
+ OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
BLANK();
#endif
- DEFINE(pbe_address, offsetof(struct pbe, address));
- DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
- DEFINE(pbe_next, offsetof(struct pbe, next));
- BLANK();
-#define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry))
+
+#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
ENTRY(bx);
ENTRY(bx);
ENTRY(cx);
@@ -107,7 +59,8 @@ int main(void)
ENTRY(flags);
BLANK();
#undef ENTRY
-#define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry))
+
+#define ENTRY(entry) OFFSET(saved_context_ ## entry, saved_context, entry)
ENTRY(cr0);
ENTRY(cr2);
ENTRY(cr3);
@@ -115,26 +68,11 @@ int main(void)
ENTRY(cr8);
BLANK();
#undef ENTRY
- DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist));
- BLANK();
- DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
- BLANK();
- DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+ OFFSET(TSS_ist, tss_struct, x86_tss.ist);
BLANK();
- OFFSET(BP_scratch, boot_params, scratch);
- OFFSET(BP_loadflags, boot_params, hdr.loadflags);
- OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
- OFFSET(BP_version, boot_params, hdr.version);
- OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
- BLANK();
- DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
-#ifdef CONFIG_XEN
- BLANK();
- OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
- OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
-#undef ENTRY
-#endif
+ DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+
return 0;
}
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 13a389179514..452932d34730 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -106,8 +106,8 @@ void __init setup_bios_corruption_check(void)
addr += size;
}
- printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
- num_scan_areas);
+ if (num_scan_areas)
+ printk(KERN_INFO "Scanning %d areas for low memory corruption\n", num_scan_areas);
}
@@ -143,12 +143,12 @@ static void check_corruption(struct work_struct *dummy)
{
check_for_bios_corruption();
schedule_delayed_work(&bios_check_work,
- round_jiffies_relative(corruption_check_period*HZ));
+ round_jiffies_relative(corruption_check_period*HZ));
}
static int start_periodic_check_for_corruption(void)
{
- if (!memory_corruption_check || corruption_check_period == 0)
+ if (!num_scan_areas || !memory_corruption_check || corruption_check_period == 0)
return 0;
printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c7bedb83c5a..f771ab6b49e9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -233,18 +233,22 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
}
#endif
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
+/*
+ * To workaround broken NUMA config. Read the comment in
+ * srat_detect_node().
+ */
static int __cpuinit nearby_node(int apicid)
{
int i, node;
for (i = apicid - 1; i >= 0; i--) {
- node = apicid_to_node[i];
+ node = __apicid_to_node[i];
if (node != NUMA_NO_NODE && node_online(node))
return node;
}
for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
- node = apicid_to_node[i];
+ node = __apicid_to_node[i];
if (node != NUMA_NO_NODE && node_online(node))
return node;
}
@@ -261,7 +265,7 @@ static int __cpuinit nearby_node(int apicid)
#ifdef CONFIG_X86_HT
static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
{
- u32 nodes;
+ u32 nodes, cores_per_cu = 1;
u8 node_id;
int cpu = smp_processor_id();
@@ -276,6 +280,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
/* get compute unit information */
smp_num_siblings = ((ebx >> 8) & 3) + 1;
c->compute_unit_id = ebx & 0xff;
+ cores_per_cu += ((ebx >> 8) & 3);
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
@@ -288,15 +293,18 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
/* fixup multi-node processor information */
if (nodes > 1) {
u32 cores_per_node;
+ u32 cus_per_node;
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
cores_per_node = c->x86_max_cores / nodes;
+ cus_per_node = cores_per_node / cores_per_cu;
/* store NodeID, use llc_shared_map to store sibling info */
per_cpu(cpu_llc_id, cpu) = node_id;
- /* core id to be in range from 0 to (cores_per_node - 1) */
- c->cpu_core_id = c->cpu_core_id % cores_per_node;
+ /* core id has to be in the [0 .. cores_per_node - 1] range */
+ c->cpu_core_id %= cores_per_node;
+ c->compute_unit_id %= cus_per_node;
}
}
#endif
@@ -334,31 +342,40 @@ EXPORT_SYMBOL_GPL(amd_get_nb_id);
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
int cpu = smp_processor_id();
int node;
unsigned apicid = c->apicid;
- node = per_cpu(cpu_llc_id, cpu);
+ node = numa_cpu_node(cpu);
+ if (node == NUMA_NO_NODE)
+ node = per_cpu(cpu_llc_id, cpu);
- if (apicid_to_node[apicid] != NUMA_NO_NODE)
- node = apicid_to_node[apicid];
if (!node_online(node)) {
- /* Two possibilities here:
- - The CPU is missing memory and no node was created.
- In that case try picking one from a nearby CPU
- - The APIC IDs differ from the HyperTransport node IDs
- which the K8 northbridge parsing fills in.
- Assume they are all increased by a constant offset,
- but in the same order as the HT nodeids.
- If that doesn't result in a usable node fall back to the
- path for the previous case. */
-
+ /*
+ * Two possibilities here:
+ *
+ * - The CPU is missing memory and no node was created. In
+ * that case try picking one from a nearby CPU.
+ *
+ * - The APIC IDs differ from the HyperTransport node IDs
+ * which the K8 northbridge parsing fills in. Assume
+ * they are all increased by a constant offset, but in
+ * the same order as the HT nodeids. If that doesn't
+ * result in a usable node fall back to the path for the
+ * previous case.
+ *
+ * This workaround operates directly on the mapping between
+ * APIC ID and NUMA node, assuming certain relationship
+ * between APIC ID, HT node ID and NUMA topology. As going
+ * through CPU mapping may alter the outcome, directly
+ * access __apicid_to_node[].
+ */
int ht_nodeid = c->initial_apicid;
if (ht_nodeid >= 0 &&
- apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
- node = apicid_to_node[ht_nodeid];
+ __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+ node = __apicid_to_node[ht_nodeid];
/* Pick a nearby node */
if (!node_online(node))
node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1d59834396bd..e2ced0074a45 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -675,7 +675,7 @@ void __init early_cpu_init(void)
const struct cpu_dev *const *cdev;
int count = 0;
-#ifdef PROCESSOR_SELECT
+#ifdef CONFIG_PROCESSOR_SELECT
printk(KERN_INFO "KERNEL supported cpus:\n");
#endif
@@ -687,7 +687,7 @@ void __init early_cpu_init(void)
cpu_devs[count] = cpudev;
count++;
-#ifdef PROCESSOR_SELECT
+#ifdef CONFIG_PROCESSOR_SELECT
{
unsigned int j;
@@ -869,7 +869,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
select_idle_routine(c);
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
}
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index bd1cac747f67..52c93648e492 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -158,9 +158,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c)
{
if (c->x86 == 0x06) {
if (cpu_has(c, X86_FEATURE_EST))
- printk(KERN_WARNING PFX "Warning: EST-capable CPU "
- "detected. The acpi-cpufreq module offers "
- "voltage scaling in addition of frequency "
+ printk_once(KERN_WARNING PFX "Warning: EST-capable "
+ "CPU detected. The acpi-cpufreq module offers "
+ "voltage scaling in addition to frequency "
"scaling. You should use that instead of "
"p4-clockmod, if possible.\n");
switch (c->x86_model) {
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
index 4f6f679f2799..4a5a42b842ad 100644
--- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c
@@ -195,7 +195,7 @@ static unsigned int pcc_get_freq(unsigned int cpu)
cmd_incomplete:
iowrite16(0, &pcch_hdr->status);
spin_unlock(&pcc_lock);
- return -EINVAL;
+ return 0;
}
static int pcc_cpufreq_target(struct cpufreq_policy *policy,
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 35c7e65e59be..c567dec854f6 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1537,6 +1537,7 @@ static struct notifier_block cpb_nb = {
static int __cpuinit powernowk8_init(void)
{
unsigned int i, supported_cpus = 0, cpu;
+ int rv;
for_each_online_cpu(i) {
int rc;
@@ -1555,14 +1556,14 @@ static int __cpuinit powernowk8_init(void)
cpb_capable = true;
- register_cpu_notifier(&cpb_nb);
-
msrs = msrs_alloc();
if (!msrs) {
printk(KERN_ERR "%s: Error allocating msrs!\n", __func__);
return -ENOMEM;
}
+ register_cpu_notifier(&cpb_nb);
+
rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs);
for_each_cpu(cpu, cpu_online_mask) {
@@ -1574,7 +1575,13 @@ static int __cpuinit powernowk8_init(void)
(cpb_enabled ? "on" : "off"));
}
- return cpufreq_register_driver(&cpufreq_amd64_driver);
+ rv = cpufreq_register_driver(&cpufreq_amd64_driver);
+ if (rv < 0 && boot_cpu_has(X86_FEATURE_CPB)) {
+ unregister_cpu_notifier(&cpb_nb);
+ msrs_free(msrs);
+ msrs = NULL;
+ }
+ return rv;
}
/* driver entry point for term */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c53d6bf..df86bc8c859d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -276,14 +276,13 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
unsigned node;
int cpu = smp_processor_id();
- int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
/* Don't do the funky fallback heuristics the AMD version employs
for now. */
- node = apicid_to_node[apicid];
+ node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE || !node_online(node)) {
/* reuse the value from init_cpu_to_node() */
node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index ec2c19a7b8ef..1ce1af2899df 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -304,8 +304,9 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
struct _cache_attr {
struct attribute attr;
- ssize_t (*show)(struct _cpuid4_info *, char *);
- ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
+ ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
+ ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
+ unsigned int);
};
#ifdef CONFIG_AMD_NB
@@ -400,7 +401,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
#define SHOW_CACHE_DISABLE(slot) \
static ssize_t \
-show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \
+show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
+ unsigned int cpu) \
{ \
return show_cache_disable(this_leaf, buf, slot); \
}
@@ -512,7 +514,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
#define STORE_CACHE_DISABLE(slot) \
static ssize_t \
store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
- const char *buf, size_t count) \
+ const char *buf, size_t count, \
+ unsigned int cpu) \
{ \
return store_cache_disable(this_leaf, buf, count, slot); \
}
@@ -524,6 +527,39 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
show_cache_disable_1, store_cache_disable_1);
+static ssize_t
+show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
+{
+ if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return -EINVAL;
+
+ return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
+}
+
+static ssize_t
+store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
+ unsigned int cpu)
+{
+ unsigned long val;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return -EINVAL;
+
+ if (strict_strtoul(buf, 16, &val) < 0)
+ return -EINVAL;
+
+ if (amd_set_subcaches(cpu, val))
+ return -EINVAL;
+
+ return count;
+}
+
+static struct _cache_attr subcaches =
+ __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
+
#else /* CONFIG_AMD_NB */
#define amd_init_l3_cache(x, y)
#endif /* CONFIG_AMD_NB */
@@ -532,9 +568,9 @@ static int
__cpuinit cpuid4_cache_lookup_regs(int index,
struct _cpuid4_info_regs *this_leaf)
{
- union _cpuid4_leaf_eax eax;
- union _cpuid4_leaf_ebx ebx;
- union _cpuid4_leaf_ecx ecx;
+ union _cpuid4_leaf_eax eax;
+ union _cpuid4_leaf_ebx ebx;
+ union _cpuid4_leaf_ecx ecx;
unsigned edx;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
@@ -732,11 +768,11 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
struct cpuinfo_x86 *c = &cpu_data(cpu);
if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
- for_each_cpu(i, c->llc_shared_map) {
+ for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
this_leaf = CPUID4_INFO_IDX(i, index);
- for_each_cpu(sibling, c->llc_shared_map) {
+ for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
if (!cpu_online(sibling))
continue;
set_bit(sibling, this_leaf->shared_cpu_map);
@@ -870,8 +906,8 @@ static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
#define show_one_plus(file_name, object, val) \
-static ssize_t show_##file_name \
- (struct _cpuid4_info *this_leaf, char *buf) \
+static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
+ unsigned int cpu) \
{ \
return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
}
@@ -882,7 +918,8 @@ show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
-static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int cpu)
{
return sprintf(buf, "%luK\n", this_leaf->size / 1024);
}
@@ -906,17 +943,20 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
return n;
}
-static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
+static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
+ unsigned int cpu)
{
return show_shared_cpu_map_func(leaf, 0, buf);
}
-static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
+static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
+ unsigned int cpu)
{
return show_shared_cpu_map_func(leaf, 1, buf);
}
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int cpu)
{
switch (this_leaf->eax.split.type) {
case CACHE_TYPE_DATA:
@@ -974,6 +1014,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
n += 2;
+ if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ n += 1;
+
attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
if (attrs == NULL)
return attrs = default_attrs;
@@ -986,6 +1029,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
attrs[n++] = &cache_disable_1.attr;
}
+ if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ attrs[n++] = &subcaches.attr;
+
return attrs;
}
#endif
@@ -998,7 +1044,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
ret = fattr->show ?
fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf) :
+ buf, this_leaf->cpu) :
0;
return ret;
}
@@ -1012,7 +1058,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
ret = fattr->store ?
fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf, count) :
+ buf, count, this_leaf->cpu) :
0;
return ret;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5bf2fac52aca..167f97b5596e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -527,15 +527,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
int i, err = 0;
struct threshold_bank *b = NULL;
char name[32];
-#ifdef CONFIG_SMP
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-#endif
sprintf(name, "threshold_bank%i", bank);
#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
- i = cpumask_first(c->llc_shared_map);
+ i = cpumask_first(cpu_llc_shared_mask(cpu));
/* first core not up yet */
if (cpu_data(i).cpu_core_id)
@@ -555,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out;
- cpumask_copy(b->cpus, c->llc_shared_map);
+ cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
per_cpu(threshold_banks, cpu)[bank] = b;
goto out;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9d977a2ea693..26604188aa49 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -30,6 +30,7 @@
#include <asm/stacktrace.h>
#include <asm/nmi.h>
#include <asm/compat.h>
+#include <asm/smp.h>
#if 0
#undef wrmsrl
@@ -93,6 +94,8 @@ struct amd_nb {
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};
+struct intel_percore;
+
#define MAX_LBR_ENTRIES 16
struct cpu_hw_events {
@@ -128,6 +131,13 @@ struct cpu_hw_events {
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
/*
+ * Intel percore register state.
+ * Coordinate shared resources between HT threads.
+ */
+ int percore_used; /* Used by this CPU? */
+ struct intel_percore *per_core;
+
+ /*
* AMD specific bits
*/
struct amd_nb *amd_nb;
@@ -166,8 +176,10 @@ struct cpu_hw_events {
/*
* Constraint on the Event code + UMask
*/
-#define PEBS_EVENT_CONSTRAINT(c, n) \
+#define INTEL_UEVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+#define PEBS_EVENT_CONSTRAINT(c, n) \
+ INTEL_UEVENT_CONSTRAINT(c, n)
#define EVENT_CONSTRAINT_END \
EVENT_CONSTRAINT(0, 0, 0)
@@ -175,6 +187,28 @@ struct cpu_hw_events {
#define for_each_event_constraint(e, c) \
for ((e) = (c); (e)->weight; (e)++)
+/*
+ * Extra registers for specific events.
+ * Some events need large masks and require external MSRs.
+ * Define a mapping to these extra registers.
+ */
+struct extra_reg {
+ unsigned int event;
+ unsigned int msr;
+ u64 config_mask;
+ u64 valid_mask;
+};
+
+#define EVENT_EXTRA_REG(e, ms, m, vm) { \
+ .event = (e), \
+ .msr = (ms), \
+ .config_mask = (m), \
+ .valid_mask = (vm), \
+ }
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
+ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
+
union perf_capabilities {
struct {
u64 lbr_format : 6;
@@ -219,6 +253,7 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
struct event_constraint *event_constraints;
+ struct event_constraint *percore_constraints;
void (*quirks)(void);
int perfctr_second_write;
@@ -247,6 +282,11 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
+
+ /*
+ * Extra registers for events
+ */
+ struct extra_reg *extra_regs;
};
static struct x86_pmu x86_pmu __read_mostly;
@@ -271,6 +311,10 @@ static u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+static u64 __read_mostly hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];
/*
* Propagate event elapsed time into the generic event.
@@ -298,7 +342,7 @@ x86_perf_event_update(struct perf_event *event)
*/
again:
prev_raw_count = local64_read(&hwc->prev_count);
- rdmsrl(hwc->event_base + idx, new_raw_count);
+ rdmsrl(hwc->event_base, new_raw_count);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
@@ -321,6 +365,49 @@ again:
return new_raw_count;
}
+/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */
+static inline int x86_pmu_addr_offset(int index)
+{
+ if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+ return index << 1;
+ return index;
+}
+
+static inline unsigned int x86_pmu_config_addr(int index)
+{
+ return x86_pmu.eventsel + x86_pmu_addr_offset(index);
+}
+
+static inline unsigned int x86_pmu_event_addr(int index)
+{
+ return x86_pmu.perfctr + x86_pmu_addr_offset(index);
+}
+
+/*
+ * Find and validate any extra registers to set up.
+ */
+static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
+{
+ struct extra_reg *er;
+
+ event->hw.extra_reg = 0;
+ event->hw.extra_config = 0;
+
+ if (!x86_pmu.extra_regs)
+ return 0;
+
+ for (er = x86_pmu.extra_regs; er->msr; er++) {
+ if (er->event != (config & er->config_mask))
+ continue;
+ if (event->attr.config1 & ~er->valid_mask)
+ return -EINVAL;
+ event->hw.extra_reg = er->msr;
+ event->hw.extra_config = event->attr.config1;
+ break;
+ }
+ return 0;
+}
+
static atomic_t active_events;
static DEFINE_MUTEX(pmc_reserve_mutex);
@@ -331,12 +418,12 @@ static bool reserve_pmc_hardware(void)
int i;
for (i = 0; i < x86_pmu.num_counters; i++) {
- if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
+ if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
goto perfctr_fail;
}
for (i = 0; i < x86_pmu.num_counters; i++) {
- if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
+ if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
goto eventsel_fail;
}
@@ -344,13 +431,13 @@ static bool reserve_pmc_hardware(void)
eventsel_fail:
for (i--; i >= 0; i--)
- release_evntsel_nmi(x86_pmu.eventsel + i);
+ release_evntsel_nmi(x86_pmu_config_addr(i));
i = x86_pmu.num_counters;
perfctr_fail:
for (i--; i >= 0; i--)
- release_perfctr_nmi(x86_pmu.perfctr + i);
+ release_perfctr_nmi(x86_pmu_event_addr(i));
return false;
}
@@ -360,8 +447,8 @@ static void release_pmc_hardware(void)
int i;
for (i = 0; i < x86_pmu.num_counters; i++) {
- release_perfctr_nmi(x86_pmu.perfctr + i);
- release_evntsel_nmi(x86_pmu.eventsel + i);
+ release_perfctr_nmi(x86_pmu_event_addr(i));
+ release_evntsel_nmi(x86_pmu_config_addr(i));
}
}
@@ -382,7 +469,7 @@ static bool check_hw_exists(void)
* complain and bail.
*/
for (i = 0; i < x86_pmu.num_counters; i++) {
- reg = x86_pmu.eventsel + i;
+ reg = x86_pmu_config_addr(i);
ret = rdmsrl_safe(reg, &val);
if (ret)
goto msr_fail;
@@ -407,8 +494,8 @@ static bool check_hw_exists(void)
* that don't trap on the MSR access and always return 0s.
*/
val = 0xabcdUL;
- ret = checking_wrmsrl(x86_pmu.perfctr, val);
- ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
+ ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
+ ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
if (ret || val != val_new)
goto msr_fail;
@@ -442,8 +529,9 @@ static inline int x86_pmu_initialized(void)
}
static inline int
-set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
{
+ struct perf_event_attr *attr = &event->attr;
unsigned int cache_type, cache_op, cache_result;
u64 config, val;
@@ -470,8 +558,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
return -EINVAL;
hwc->config |= val;
-
- return 0;
+ attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
+ return x86_pmu_extra_regs(val, event);
}
static int x86_setup_perfctr(struct perf_event *event)
@@ -496,10 +584,10 @@ static int x86_setup_perfctr(struct perf_event *event)
}
if (attr->type == PERF_TYPE_RAW)
- return 0;
+ return x86_pmu_extra_regs(event->attr.config, event);
if (attr->type == PERF_TYPE_HW_CACHE)
- return set_ext_hw_attr(hwc, attr);
+ return set_ext_hw_attr(hwc, event);
if (attr->config >= x86_pmu.max_events)
return -EINVAL;
@@ -617,11 +705,11 @@ static void x86_pmu_disable_all(void)
if (!test_bit(idx, cpuc->active_mask))
continue;
- rdmsrl(x86_pmu.eventsel + idx, val);
+ rdmsrl(x86_pmu_config_addr(idx), val);
if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
continue;
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu.eventsel + idx, val);
+ wrmsrl(x86_pmu_config_addr(idx), val);
}
}
@@ -642,21 +730,26 @@ static void x86_pmu_disable(struct pmu *pmu)
x86_pmu.disable_all();
}
+static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
+ u64 enable_mask)
+{
+ if (hwc->extra_reg)
+ wrmsrl(hwc->extra_reg, hwc->extra_config);
+ wrmsrl(hwc->config_base, hwc->config | enable_mask);
+}
+
static void x86_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- struct perf_event *event = cpuc->events[idx];
- u64 val;
+ struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
if (!test_bit(idx, cpuc->active_mask))
continue;
- val = event->hw.config;
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsrl(x86_pmu.eventsel + idx, val);
+ __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
}
}
@@ -821,15 +914,10 @@ static inline void x86_assign_hw_event(struct perf_event *event,
hwc->event_base = 0;
} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
- /*
- * We set it so that event_base + idx in wrmsr/rdmsr maps to
- * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
- */
- hwc->event_base =
- MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+ hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0;
} else {
- hwc->config_base = x86_pmu.eventsel;
- hwc->event_base = x86_pmu.perfctr;
+ hwc->config_base = x86_pmu_config_addr(hwc->idx);
+ hwc->event_base = x86_pmu_event_addr(hwc->idx);
}
}
@@ -915,17 +1003,11 @@ static void x86_pmu_enable(struct pmu *pmu)
x86_pmu.enable_all(added);
}
-static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
- u64 enable_mask)
-{
- wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
-}
-
static inline void x86_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- wrmsrl(hwc->config_base + hwc->idx, hwc->config);
+ wrmsrl(hwc->config_base, hwc->config);
}
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -978,7 +1060,7 @@ x86_perf_event_set_period(struct perf_event *event)
*/
local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+ wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
* Due to erratum on certan cpu we need
@@ -986,7 +1068,7 @@ x86_perf_event_set_period(struct perf_event *event)
* is updated properly
*/
if (x86_pmu.perfctr_second_write) {
- wrmsrl(hwc->event_base + idx,
+ wrmsrl(hwc->event_base,
(u64)(-left) & x86_pmu.cntval_mask);
}
@@ -1113,8 +1195,8 @@ void perf_event_print_debug(void)
pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
- rdmsrl(x86_pmu.perfctr + idx, pmc_count);
+ rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
+ rdmsrl(x86_pmu_event_addr(idx), pmc_count);
prev_left = per_cpu(pmc_prev_left[idx], cpu);
@@ -1389,7 +1471,7 @@ static void __init pmu_check_apic(void)
pr_info("no hardware sampling interrupt available.\n");
}
-int __init init_hw_perf_events(void)
+static int __init init_hw_perf_events(void)
{
struct event_constraint *c;
int err;
@@ -1608,7 +1690,7 @@ out:
return ret;
}
-int x86_pmu_event_init(struct perf_event *event)
+static int x86_pmu_event_init(struct perf_event *event)
{
struct pmu *tmp;
int err;
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67e2202a6039..461f62bbd774 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event)
/*
* AMD64 events are detected based on their event codes.
*/
+static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc)
+{
+ return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff);
+}
+
static inline int amd_is_nb_event(struct hw_perf_event *hwc)
{
return (hwc->config & 0xe0) == 0xe0;
@@ -385,13 +390,181 @@ static __initconst const struct x86_pmu amd_pmu = {
.cpu_dead = amd_pmu_cpu_dead,
};
+/* AMD Family 15h */
+
+#define AMD_EVENT_TYPE_MASK 0x000000F0ULL
+
+#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL
+#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL
+#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL
+#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL
+#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL
+#define AMD_EVENT_EX_LS 0x000000C0ULL
+#define AMD_EVENT_DE 0x000000D0ULL
+#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL
+
+/*
+ * AMD family 15h event code/PMC mappings:
+ *
+ * type = event_code & 0x0F0:
+ *
+ * 0x000 FP PERF_CTL[5:3]
+ * 0x010 FP PERF_CTL[5:3]
+ * 0x020 LS PERF_CTL[5:0]
+ * 0x030 LS PERF_CTL[5:0]
+ * 0x040 DC PERF_CTL[5:0]
+ * 0x050 DC PERF_CTL[5:0]
+ * 0x060 CU PERF_CTL[2:0]
+ * 0x070 CU PERF_CTL[2:0]
+ * 0x080 IC/DE PERF_CTL[2:0]
+ * 0x090 IC/DE PERF_CTL[2:0]
+ * 0x0A0 ---
+ * 0x0B0 ---
+ * 0x0C0 EX/LS PERF_CTL[5:0]
+ * 0x0D0 DE PERF_CTL[2:0]
+ * 0x0E0 NB NB_PERF_CTL[3:0]
+ * 0x0F0 NB NB_PERF_CTL[3:0]
+ *
+ * Exceptions:
+ *
+ * 0x003 FP PERF_CTL[3]
+ * 0x00B FP PERF_CTL[3]
+ * 0x00D FP PERF_CTL[3]
+ * 0x023 DE PERF_CTL[2:0]
+ * 0x02D LS PERF_CTL[3]
+ * 0x02E LS PERF_CTL[3,0]
+ * 0x043 CU PERF_CTL[2:0]
+ * 0x045 CU PERF_CTL[2:0]
+ * 0x046 CU PERF_CTL[2:0]
+ * 0x054 CU PERF_CTL[2:0]
+ * 0x055 CU PERF_CTL[2:0]
+ * 0x08F IC PERF_CTL[0]
+ * 0x187 DE PERF_CTL[0]
+ * 0x188 DE PERF_CTL[0]
+ * 0x0DB EX PERF_CTL[5:0]
+ * 0x0DC LS PERF_CTL[5:0]
+ * 0x0DD LS PERF_CTL[5:0]
+ * 0x0DE LS PERF_CTL[5:0]
+ * 0x0DF LS PERF_CTL[5:0]
+ * 0x1D6 EX PERF_CTL[5:0]
+ * 0x1D8 EX PERF_CTL[5:0]
+ */
+
+static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0);
+static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0);
+static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0);
+static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0);
+static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
+static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+
+static struct event_constraint *
+amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ unsigned int event_code = amd_get_event_code(&event->hw);
+
+ switch (event_code & AMD_EVENT_TYPE_MASK) {
+ case AMD_EVENT_FP:
+ switch (event_code) {
+ case 0x003:
+ case 0x00B:
+ case 0x00D:
+ return &amd_f15_PMC3;
+ default:
+ return &amd_f15_PMC53;
+ }
+ case AMD_EVENT_LS:
+ case AMD_EVENT_DC:
+ case AMD_EVENT_EX_LS:
+ switch (event_code) {
+ case 0x023:
+ case 0x043:
+ case 0x045:
+ case 0x046:
+ case 0x054:
+ case 0x055:
+ return &amd_f15_PMC20;
+ case 0x02D:
+ return &amd_f15_PMC3;
+ case 0x02E:
+ return &amd_f15_PMC30;
+ default:
+ return &amd_f15_PMC50;
+ }
+ case AMD_EVENT_CU:
+ case AMD_EVENT_IC_DE:
+ case AMD_EVENT_DE:
+ switch (event_code) {
+ case 0x08F:
+ case 0x187:
+ case 0x188:
+ return &amd_f15_PMC0;
+ case 0x0DB ... 0x0DF:
+ case 0x1D6:
+ case 0x1D8:
+ return &amd_f15_PMC50;
+ default:
+ return &amd_f15_PMC20;
+ }
+ case AMD_EVENT_NB:
+ /* not yet implemented */
+ return &emptyconstraint;
+ default:
+ return &emptyconstraint;
+ }
+}
+
+static __initconst const struct x86_pmu amd_pmu_f15h = {
+ .name = "AMD Family 15h",
+ .handle_irq = x86_pmu_handle_irq,
+ .disable_all = x86_pmu_disable_all,
+ .enable_all = x86_pmu_enable_all,
+ .enable = x86_pmu_enable_event,
+ .disable = x86_pmu_disable_event,
+ .hw_config = amd_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_F15H_PERF_CTL,
+ .perfctr = MSR_F15H_PERF_CTR,
+ .event_map = amd_pmu_event_map,
+ .max_events = ARRAY_SIZE(amd_perfmon_event_map),
+ .num_counters = 6,
+ .cntval_bits = 48,
+ .cntval_mask = (1ULL << 48) - 1,
+ .apic = 1,
+ /* use highest bit to detect overflow */
+ .max_period = (1ULL << 47) - 1,
+ .get_event_constraints = amd_get_event_constraints_f15h,
+ /* nortbridge counters not yet implemented: */
+#if 0
+ .put_event_constraints = amd_put_event_constraints,
+
+ .cpu_prepare = amd_pmu_cpu_prepare,
+ .cpu_starting = amd_pmu_cpu_starting,
+ .cpu_dead = amd_pmu_cpu_dead,
+#endif
+};
+
static __init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
return -ENODEV;
- x86_pmu = amd_pmu;
+ /*
+ * If core performance counter extensions exists, it must be
+ * family 15h, otherwise fail. See x86_pmu_addr_offset().
+ */
+ switch (boot_cpu_data.x86) {
+ case 0x15:
+ if (!cpu_has_perfctr_core)
+ return -ENODEV;
+ x86_pmu = amd_pmu_f15h;
+ break;
+ default:
+ if (cpu_has_perfctr_core)
+ return -ENODEV;
+ x86_pmu = amd_pmu;
+ break;
+ }
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 008835c1d79c..8fc2b2cee1da 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,5 +1,27 @@
#ifdef CONFIG_CPU_SUP_INTEL
+#define MAX_EXTRA_REGS 2
+
+/*
+ * Per register state.
+ */
+struct er_account {
+ int ref; /* reference count */
+ unsigned int extra_reg; /* extra MSR number */
+ u64 extra_config; /* extra MSR config */
+};
+
+/*
+ * Per core state
+ * This used to coordinate shared registers for HT threads.
+ */
+struct intel_percore {
+ raw_spinlock_t lock; /* protect structure */
+ struct er_account regs[MAX_EXTRA_REGS];
+ int refcnt; /* number of threads */
+ unsigned core_id;
+};
+
/*
* Intel PerfMon, used on Core and later.
*/
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
EVENT_CONSTRAINT_END
};
+static struct extra_reg intel_nehalem_extra_regs[] =
+{
+ INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
+ EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_nehalem_percore_constraints[] =
+{
+ INTEL_EVENT_CONSTRAINT(0xb7, 0),
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_westmere_event_constraints[] =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -76,6 +110,33 @@ static struct event_constraint intel_westmere_event_constraints[] =
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_snb_event_constraints[] =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+ INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
+ INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
+ INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_westmere_extra_regs[] =
+{
+ INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
+ INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
+ EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_westmere_percore_constraints[] =
+{
+ INTEL_EVENT_CONSTRAINT(0xb7, 0),
+ INTEL_EVENT_CONSTRAINT(0xbb, 0),
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_gen_event_constraints[] =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -89,6 +150,106 @@ static u64 intel_pmu_event_map(int hw_event)
return intel_perfmon_event_map[hw_event];
}
+static __initconst const u64 snb_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
+ [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
+ [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ /*
+ * TBD: Need Off-core Response Performance Monitoring support
+ */
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
+ },
+ [ C(OP_WRITE) ] = {
+ /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
+ },
+ [ C(OP_PREFETCH) ] = {
+ /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
+ [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
+ [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
+ [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
static __initconst const u64 westmere_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -124,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
- [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
+ /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
},
+ /*
+ * Use RFO, not WRITEBACK, because a write miss would typically occur
+ * on RFO.
+ */
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
- [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
+ /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01bb,
+ /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
- [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
+ /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
},
},
[ C(DTLB) ] = {
@@ -180,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids
},
};
+/*
+ * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
+ */
+
+#define DMND_DATA_RD (1 << 0)
+#define DMND_RFO (1 << 1)
+#define DMND_WB (1 << 3)
+#define PF_DATA_RD (1 << 4)
+#define PF_DATA_RFO (1 << 5)
+#define RESP_UNCORE_HIT (1 << 8)
+#define RESP_MISS (0xf600) /* non uncore hit */
+
+static __initconst const u64 nehalem_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
+ [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
+ [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
+ [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
+ },
+ }
+};
+
static __initconst const u64 nehalem_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -215,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
- [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
+ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
+ /*
+ * Use RFO, not WRITEBACK, because a write miss would typically occur
+ * on RFO.
+ */
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
- [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
+ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
- [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
+ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
},
},
[ C(DTLB) ] = {
@@ -691,8 +905,8 @@ static void intel_pmu_reset(void)
printk("clearing PMU state on CPU#%d\n", smp_processor_id());
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
- checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
+ checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
+ checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
}
for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
@@ -794,6 +1008,67 @@ intel_bts_constraints(struct perf_event *event)
}
static struct event_constraint *
+intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
+ struct event_constraint *c;
+ struct intel_percore *pc;
+ struct er_account *era;
+ int i;
+ int free_slot;
+ int found;
+
+ if (!x86_pmu.percore_constraints || hwc->extra_alloc)
+ return NULL;
+
+ for (c = x86_pmu.percore_constraints; c->cmask; c++) {
+ if (e != c->code)
+ continue;
+
+ /*
+ * Allocate resource per core.
+ */
+ pc = cpuc->per_core;
+ if (!pc)
+ break;
+ c = &emptyconstraint;
+ raw_spin_lock(&pc->lock);
+ free_slot = -1;
+ found = 0;
+ for (i = 0; i < MAX_EXTRA_REGS; i++) {
+ era = &pc->regs[i];
+ if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
+ /* Allow sharing same config */
+ if (hwc->extra_config == era->extra_config) {
+ era->ref++;
+ cpuc->percore_used = 1;
+ hwc->extra_alloc = 1;
+ c = NULL;
+ }
+ /* else conflict */
+ found = 1;
+ break;
+ } else if (era->ref == 0 && free_slot == -1)
+ free_slot = i;
+ }
+ if (!found && free_slot != -1) {
+ era = &pc->regs[free_slot];
+ era->ref = 1;
+ era->extra_reg = hwc->extra_reg;
+ era->extra_config = hwc->extra_config;
+ cpuc->percore_used = 1;
+ hwc->extra_alloc = 1;
+ c = NULL;
+ }
+ raw_spin_unlock(&pc->lock);
+ return c;
+ }
+
+ return NULL;
+}
+
+static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
{
struct event_constraint *c;
@@ -806,9 +1081,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
if (c)
return c;
+ c = intel_percore_constraints(cpuc, event);
+ if (c)
+ return c;
+
return x86_get_event_constraints(cpuc, event);
}
+static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ struct extra_reg *er;
+ struct intel_percore *pc;
+ struct er_account *era;
+ struct hw_perf_event *hwc = &event->hw;
+ int i, allref;
+
+ if (!cpuc->percore_used)
+ return;
+
+ for (er = x86_pmu.extra_regs; er->msr; er++) {
+ if (er->event != (hwc->config & er->config_mask))
+ continue;
+
+ pc = cpuc->per_core;
+ raw_spin_lock(&pc->lock);
+ for (i = 0; i < MAX_EXTRA_REGS; i++) {
+ era = &pc->regs[i];
+ if (era->ref > 0 &&
+ era->extra_config == hwc->extra_config &&
+ era->extra_reg == er->msr) {
+ era->ref--;
+ hwc->extra_alloc = 0;
+ break;
+ }
+ }
+ allref = 0;
+ for (i = 0; i < MAX_EXTRA_REGS; i++)
+ allref += pc->regs[i].ref;
+ if (allref == 0)
+ cpuc->percore_used = 0;
+ raw_spin_unlock(&pc->lock);
+ break;
+ }
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -880,20 +1197,67 @@ static __initconst const struct x86_pmu core_pmu = {
*/
.max_period = (1ULL << 31) - 1,
.get_event_constraints = intel_get_event_constraints,
+ .put_event_constraints = intel_put_event_constraints,
.event_constraints = intel_core_event_constraints,
};
+static int intel_pmu_cpu_prepare(int cpu)
+{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+ if (!cpu_has_ht_siblings())
+ return NOTIFY_OK;
+
+ cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!cpuc->per_core)
+ return NOTIFY_BAD;
+
+ raw_spin_lock_init(&cpuc->per_core->lock);
+ cpuc->per_core->core_id = -1;
+ return NOTIFY_OK;
+}
+
static void intel_pmu_cpu_starting(int cpu)
{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ int core_id = topology_core_id(cpu);
+ int i;
+
init_debug_store_on_cpu(cpu);
/*
* Deal with CPUs that don't clear their LBRs on power-up.
*/
intel_pmu_lbr_reset();
+
+ if (!cpu_has_ht_siblings())
+ return;
+
+ for_each_cpu(i, topology_thread_cpumask(cpu)) {
+ struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
+
+ if (pc && pc->core_id == core_id) {
+ kfree(cpuc->per_core);
+ cpuc->per_core = pc;
+ break;
+ }
+ }
+
+ cpuc->per_core->core_id = core_id;
+ cpuc->per_core->refcnt++;
}
static void intel_pmu_cpu_dying(int cpu)
{
+ struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+ struct intel_percore *pc = cpuc->per_core;
+
+ if (pc) {
+ if (pc->core_id == -1 || --pc->refcnt == 0)
+ kfree(pc);
+ cpuc->per_core = NULL;
+ }
+
fini_debug_store_on_cpu(cpu);
}
@@ -918,7 +1282,9 @@ static __initconst const struct x86_pmu intel_pmu = {
*/
.max_period = (1ULL << 31) - 1,
.get_event_constraints = intel_get_event_constraints,
+ .put_event_constraints = intel_put_event_constraints,
+ .cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
};
@@ -1024,6 +1390,7 @@ static __init int intel_pmu_init(void)
intel_pmu_lbr_init_core();
x86_pmu.event_constraints = intel_core2_event_constraints;
+ x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
pr_cont("Core2 events, ");
break;
@@ -1032,11 +1399,16 @@ static __init int intel_pmu_init(void)
case 46: /* 45 nm nehalem-ex, "Beckton" */
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_nhm();
x86_pmu.event_constraints = intel_nehalem_event_constraints;
+ x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
+ x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ x86_pmu.extra_regs = intel_nehalem_extra_regs;
pr_cont("Nehalem events, ");
break;
@@ -1047,6 +1419,7 @@ static __init int intel_pmu_init(void)
intel_pmu_lbr_init_atom();
x86_pmu.event_constraints = intel_gen_event_constraints;
+ x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
pr_cont("Atom events, ");
break;
@@ -1054,14 +1427,30 @@ static __init int intel_pmu_init(void)
case 44: /* 32 nm nehalem, "Gulftown" */
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
intel_pmu_lbr_init_nhm();
x86_pmu.event_constraints = intel_westmere_event_constraints;
+ x86_pmu.percore_constraints = intel_westmere_percore_constraints;
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_westmere_extra_regs;
pr_cont("Westmere events, ");
break;
+ case 42: /* SandyBridge */
+ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ intel_pmu_lbr_init_nhm();
+
+ x86_pmu.event_constraints = intel_snb_event_constraints;
+ x86_pmu.pebs_constraints = intel_snb_pebs_events;
+ pr_cont("SandyBridge events, ");
+ break;
+
default:
/*
* default constraints for v2 and up
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index b7dcd9f2b8a0..b95c66ae4a2a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -361,30 +361,88 @@ static int intel_pmu_drain_bts_buffer(void)
/*
* PEBS
*/
-
-static struct event_constraint intel_core_pebs_events[] = {
- PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
+static struct event_constraint intel_core2_pebs_event_constraints[] = {
+ PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
- PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
- PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
- PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
- PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
- PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_atom_pebs_event_constraints[] = {
+ PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
+ PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
EVENT_CONSTRAINT_END
};
-static struct event_constraint intel_nehalem_pebs_events[] = {
- PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
- PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
- PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
- PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
- PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
- PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
- PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
- PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
- PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
+static struct event_constraint intel_nehalem_pebs_event_constraints[] = {
+ INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
+ PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
+ INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
+ INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
+ INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
+ PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_westmere_pebs_event_constraints[] = {
+ INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
+ PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
+ INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
+
+ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
+ PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
+ EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_snb_pebs_events[] = {
+ PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
+ PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */
+ PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */
+ PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */
+ PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */
+ PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */
+ PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */
+ PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */
+ PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
+ PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
+ PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
+ PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */
+ PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */
+ PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */
+ PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
+ PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
+ PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
+ PEBS_EVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
+ PEBS_EVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
+ PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
+ PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
+ PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
+ PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
+ PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
+ PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */
+ PEBS_EVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
+ PEBS_EVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
+ PEBS_EVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
+ PEBS_EVENT_CONSTRAINT(0x04d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM */
+ PEBS_EVENT_CONSTRAINT(0x08d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE */
+ PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
EVENT_CONSTRAINT_END
};
@@ -695,20 +753,17 @@ static void intel_ds_init(void)
printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
- x86_pmu.pebs_constraints = intel_core_pebs_events;
break;
case 1:
printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
- x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
break;
default:
printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
x86_pmu.pebs = 0;
- break;
}
}
}
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index f7a0993c1e7c..3769ac822f96 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -764,15 +764,20 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
u64 v;
/* an official way for overflow indication */
- rdmsrl(hwc->config_base + hwc->idx, v);
+ rdmsrl(hwc->config_base, v);
if (v & P4_CCCR_OVF) {
- wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF);
+ wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF);
return 1;
}
- /* it might be unflagged overflow */
- rdmsrl(hwc->event_base + hwc->idx, v);
- if (!(v & ARCH_P4_CNTRVAL_MASK))
+ /*
+ * In some circumstances the overflow might issue an NMI but did
+ * not set P4_CCCR_OVF bit. Because a counter holds a negative value
+ * we simply check for high bit being set, if it's cleared it means
+ * the counter has reached zero value and continued counting before
+ * real NMI signal was received:
+ */
+ if (!(v & ARCH_P4_UNFLAGGED_BIT))
return 1;
return 0;
@@ -810,7 +815,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
* state we need to clear P4_CCCR_OVF, otherwise interrupt get
* asserted again and again
*/
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
+ (void)checking_wrmsrl(hwc->config_base,
(u64)(p4_config_unpack_cccr(hwc->config)) &
~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
}
@@ -880,7 +885,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
p4_pmu_enable_pebs(hwc->config);
(void)checking_wrmsrl(escr_addr, escr_conf);
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
+ (void)checking_wrmsrl(hwc->config_base,
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
}
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 34ba07be2cda..20c097e33860 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event)
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
+ (void)checking_wrmsrl(hwc->config_base, val);
}
static void p6_pmu_enable_event(struct perf_event *event)
@@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event)
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL_ENABLE;
- (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
+ (void)checking_wrmsrl(hwc->config_base, val);
}
static __initconst const struct x86_pmu p6_pmu = {
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d5a236615501..966512b2cacf 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
/* returns the bit offset of the performance counter register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
+ if (msr >= MSR_F15H_PERF_CTR)
+ return (msr - MSR_F15H_PERF_CTR) >> 1;
return msr - MSR_K7_PERFCTR0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
@@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
/* returns the bit offset of the event selection register */
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
+ if (msr >= MSR_F15H_PERF_CTL)
+ return (msr - MSR_F15H_PERF_CTL) >> 1;
return msr - MSR_K7_EVNTSEL0;
case X86_VENDOR_INTEL:
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
new file mode 100644
index 000000000000..7a8cebc9ff29
--- /dev/null
+++ b/arch/x86/kernel/devicetree.c
@@ -0,0 +1,441 @@
+/*
+ * Architecture specific OF callbacks.
+ */
+#include <linux/bootmem.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/of_pci.h>
+
+#include <asm/hpet.h>
+#include <asm/irq_controller.h>
+#include <asm/apic.h>
+#include <asm/pci_x86.h>
+
+__initdata u64 initial_dtb;
+char __initdata cmd_line[COMMAND_LINE_SIZE];
+static LIST_HEAD(irq_domains);
+static DEFINE_RAW_SPINLOCK(big_irq_lock);
+
+int __initdata of_ioapic;
+
+#ifdef CONFIG_X86_IO_APIC
+static void add_interrupt_host(struct irq_domain *ih)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&big_irq_lock, flags);
+ list_add(&ih->l, &irq_domains);
+ raw_spin_unlock_irqrestore(&big_irq_lock, flags);
+}
+#endif
+
+static struct irq_domain *get_ih_from_node(struct device_node *controller)
+{
+ struct irq_domain *ih, *found = NULL;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&big_irq_lock, flags);
+ list_for_each_entry(ih, &irq_domains, l) {
+ if (ih->controller == controller) {
+ found = ih;
+ break;
+ }
+ }
+ raw_spin_unlock_irqrestore(&big_irq_lock, flags);
+ return found;
+}
+
+unsigned int irq_create_of_mapping(struct device_node *controller,
+ const u32 *intspec, unsigned int intsize)
+{
+ struct irq_domain *ih;
+ u32 virq, type;
+ int ret;
+
+ ih = get_ih_from_node(controller);
+ if (!ih)
+ return 0;
+ ret = ih->xlate(ih, intspec, intsize, &virq, &type);
+ if (ret)
+ return ret;
+ if (type == IRQ_TYPE_NONE)
+ return virq;
+ /* set the mask if it is different from current */
+ if (type == (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
+ set_irq_type(virq, type);
+ return virq;
+}
+EXPORT_SYMBOL_GPL(irq_create_of_mapping);
+
+unsigned long pci_address_to_pio(phys_addr_t address)
+{
+ /*
+ * The ioport address can be directly used by inX / outX
+ */
+ BUG_ON(address >= (1 << 16));
+ return (unsigned long)address;
+}
+EXPORT_SYMBOL_GPL(pci_address_to_pio);
+
+void __init early_init_dt_scan_chosen_arch(unsigned long node)
+{
+ BUG();
+}
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+ BUG();
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+ return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS));
+}
+
+void __init add_dtb(u64 data)
+{
+ initial_dtb = data + offsetof(struct setup_data, data);
+}
+
+/*
+ * CE4100 ids. Will be moved to machine_device_initcall() once we have it.
+ */
+static struct of_device_id __initdata ce4100_ids[] = {
+ { .compatible = "intel,ce4100-cp", },
+ { .compatible = "isa", },
+ { .compatible = "pci", },
+ {},
+};
+
+static int __init add_bus_probe(void)
+{
+ if (!of_have_populated_dt())
+ return 0;
+
+ return of_platform_bus_probe(NULL, ce4100_ids, NULL);
+}
+module_init(add_bus_probe);
+
+#ifdef CONFIG_PCI
+static int x86_of_pci_irq_enable(struct pci_dev *dev)
+{
+ struct of_irq oirq;
+ u32 virq;
+ int ret;
+ u8 pin;
+
+ ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+ if (ret)
+ return ret;
+ if (!pin)
+ return 0;
+
+ ret = of_irq_map_pci(dev, &oirq);
+ if (ret)
+ return ret;
+
+ virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
+ oirq.size);
+ if (virq == 0)
+ return -EINVAL;
+ dev->irq = virq;
+ return 0;
+}
+
+static void x86_of_pci_irq_disable(struct pci_dev *dev)
+{
+}
+
+void __cpuinit x86_of_pci_init(void)
+{
+ struct device_node *np;
+
+ pcibios_enable_irq = x86_of_pci_irq_enable;
+ pcibios_disable_irq = x86_of_pci_irq_disable;
+
+ for_each_node_by_type(np, "pci") {
+ const void *prop;
+ struct pci_bus *bus;
+ unsigned int bus_min;
+ struct device_node *child;
+
+ prop = of_get_property(np, "bus-range", NULL);
+ if (!prop)
+ continue;
+ bus_min = be32_to_cpup(prop);
+
+ bus = pci_find_bus(0, bus_min);
+ if (!bus) {
+ printk(KERN_ERR "Can't find a node for bus %s.\n",
+ np->full_name);
+ continue;
+ }
+
+ if (bus->self)
+ bus->self->dev.of_node = np;
+ else
+ bus->dev.of_node = np;
+
+ for_each_child_of_node(np, child) {
+ struct pci_dev *dev;
+ u32 devfn;
+
+ prop = of_get_property(child, "reg", NULL);
+ if (!prop)
+ continue;
+
+ devfn = (be32_to_cpup(prop) >> 8) & 0xff;
+ dev = pci_get_slot(bus, devfn);
+ if (!dev)
+ continue;
+ dev->dev.of_node = child;
+ pci_dev_put(dev);
+ }
+ }
+}
+#endif
+
+static void __init dtb_setup_hpet(void)
+{
+#ifdef CONFIG_HPET_TIMER
+ struct device_node *dn;
+ struct resource r;
+ int ret;
+
+ dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-hpet");
+ if (!dn)
+ return;
+ ret = of_address_to_resource(dn, 0, &r);
+ if (ret) {
+ WARN_ON(1);
+ return;
+ }
+ hpet_address = r.start;
+#endif
+}
+
+static void __init dtb_lapic_setup(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ struct device_node *dn;
+ struct resource r;
+ int ret;
+
+ dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-lapic");
+ if (!dn)
+ return;
+
+ ret = of_address_to_resource(dn, 0, &r);
+ if (WARN_ON(ret))
+ return;
+
+ /* Did the boot loader setup the local APIC ? */
+ if (!cpu_has_apic) {
+ if (apic_force_enable(r.start))
+ return;
+ }
+ smp_found_config = 1;
+ pic_mode = 1;
+ register_lapic_address(r.start);
+ generic_processor_info(boot_cpu_physical_apicid,
+ GET_APIC_VERSION(apic_read(APIC_LVR)));
+#endif
+}
+
+#ifdef CONFIG_X86_IO_APIC
+static unsigned int ioapic_id;
+
+static void __init dtb_add_ioapic(struct device_node *dn)
+{
+ struct resource r;
+ int ret;
+
+ ret = of_address_to_resource(dn, 0, &r);
+ if (ret) {
+ printk(KERN_ERR "Can't obtain address from node %s.\n",
+ dn->full_name);
+ return;
+ }
+ mp_register_ioapic(++ioapic_id, r.start, gsi_top);
+}
+
+static void __init dtb_ioapic_setup(void)
+{
+ struct device_node *dn;
+
+ for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic")
+ dtb_add_ioapic(dn);
+
+ if (nr_ioapics) {
+ of_ioapic = 1;
+ return;
+ }
+ printk(KERN_ERR "Error: No information about IO-APIC in OF.\n");
+}
+#else
+static void __init dtb_ioapic_setup(void) {}
+#endif
+
+static void __init dtb_apic_setup(void)
+{
+ dtb_lapic_setup();
+ dtb_ioapic_setup();
+}
+
+#ifdef CONFIG_OF_FLATTREE
+static void __init x86_flattree_get_config(void)
+{
+ u32 size, map_len;
+ void *new_dtb;
+
+ if (!initial_dtb)
+ return;
+
+ map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK),
+ (u64)sizeof(struct boot_param_header));
+
+ initial_boot_params = early_memremap(initial_dtb, map_len);
+ size = be32_to_cpu(initial_boot_params->totalsize);
+ if (map_len < size) {
+ early_iounmap(initial_boot_params, map_len);
+ initial_boot_params = early_memremap(initial_dtb, size);
+ map_len = size;
+ }
+
+ new_dtb = alloc_bootmem(size);
+ memcpy(new_dtb, initial_boot_params, size);
+ early_iounmap(initial_boot_params, map_len);
+
+ initial_boot_params = new_dtb;
+
+ /* root level address cells */
+ of_scan_flat_dt(early_init_dt_scan_root, NULL);
+
+ unflatten_device_tree();
+}
+#else
+static inline void x86_flattree_get_config(void) { }
+#endif
+
+void __init x86_dtb_init(void)
+{
+ x86_flattree_get_config();
+
+ if (!of_have_populated_dt())
+ return;
+
+ dtb_setup_hpet();
+ dtb_apic_setup();
+}
+
+#ifdef CONFIG_X86_IO_APIC
+
+struct of_ioapic_type {
+ u32 out_type;
+ u32 trigger;
+ u32 polarity;
+};
+
+static struct of_ioapic_type of_ioapic_type[] =
+{
+ {
+ .out_type = IRQ_TYPE_EDGE_RISING,
+ .trigger = IOAPIC_EDGE,
+ .polarity = 1,
+ },
+ {
+ .out_type = IRQ_TYPE_LEVEL_LOW,
+ .trigger = IOAPIC_LEVEL,
+ .polarity = 0,
+ },
+ {
+ .out_type = IRQ_TYPE_LEVEL_HIGH,
+ .trigger = IOAPIC_LEVEL,
+ .polarity = 1,
+ },
+ {
+ .out_type = IRQ_TYPE_EDGE_FALLING,
+ .trigger = IOAPIC_EDGE,
+ .polarity = 0,
+ },
+};
+
+static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
+ u32 *out_hwirq, u32 *out_type)
+{
+ struct io_apic_irq_attr attr;
+ struct of_ioapic_type *it;
+ u32 line, idx, type;
+
+ if (intsize < 2)
+ return -EINVAL;
+
+ line = *intspec;
+ idx = (u32) id->priv;
+ *out_hwirq = line + mp_gsi_routing[idx].gsi_base;
+
+ intspec++;
+ type = *intspec;
+
+ if (type >= ARRAY_SIZE(of_ioapic_type))
+ return -EINVAL;
+
+ it = of_ioapic_type + type;
+ *out_type = it->out_type;
+
+ set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
+
+ return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr);
+}
+
+static void __init ioapic_add_ofnode(struct device_node *np)
+{
+ struct resource r;
+ int i, ret;
+
+ ret = of_address_to_resource(np, 0, &r);
+ if (ret) {
+ printk(KERN_ERR "Failed to obtain address for %s\n",
+ np->full_name);
+ return;
+ }
+
+ for (i = 0; i < nr_ioapics; i++) {
+ if (r.start == mp_ioapics[i].apicaddr) {
+ struct irq_domain *id;
+
+ id = kzalloc(sizeof(*id), GFP_KERNEL);
+ BUG_ON(!id);
+ id->controller = np;
+ id->xlate = ioapic_xlate;
+ id->priv = (void *)i;
+ add_interrupt_host(id);
+ return;
+ }
+ }
+ printk(KERN_ERR "IOxAPIC at %s is not registered.\n", np->full_name);
+}
+
+void __init x86_add_irq_domains(void)
+{
+ struct device_node *dp;
+
+ if (!of_have_populated_dt())
+ return;
+
+ for_each_node_with_property(dp, "interrupt-controller") {
+ if (of_device_is_compatible(dp, "intel,ce4100-ioapic"))
+ ioapic_add_ofnode(dp);
+ }
+}
+#else
+void __init x86_add_irq_domains(void) { }
+#endif
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index df20723a6a1b..220a1c11cfde 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -320,31 +320,6 @@ void die(const char *str, struct pt_regs *regs, long err)
oops_end(flags, regs, sig);
}
-void notrace __kprobes
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
-{
- unsigned long flags;
-
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
-
- /*
- * We are in trouble anyway, lets at least try
- * to get a message out.
- */
- flags = oops_begin();
- printk(KERN_EMERG "%s", str);
- printk(" on CPU%d, ip %08lx, registers:\n",
- smp_processor_id(), regs->ip);
- show_registers(regs);
- oops_end(flags, regs, 0);
- if (do_panic || panic_on_oops)
- panic("Non maskable interrupt");
- nmi_exit();
- local_irq_enable();
- do_exit(SIGBUS);
-}
-
static int __init oops_setup(char *s)
{
if (!s)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 294f26da0c0c..cdf5bfd9d4d5 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -667,21 +667,15 @@ __init void e820_setup_gap(void)
* boot_params.e820_map, others are passed via SETUP_E820_EXT node of
* linked list of struct setup_data, which is parsed here.
*/
-void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
+void __init parse_e820_ext(struct setup_data *sdata)
{
- u32 map_len;
int entries;
struct e820entry *extmap;
entries = sdata->len / sizeof(struct e820entry);
- map_len = sdata->len + sizeof(struct setup_data);
- if (map_len > PAGE_SIZE)
- sdata = early_ioremap(pa_data, map_len);
extmap = (struct e820entry *)(sdata->data);
__append_e820_map(extmap, entries);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
- if (map_len > PAGE_SIZE)
- early_iounmap(sdata, map_len);
printk(KERN_INFO "extended physical RAM map:\n");
e820_print_map("extended");
}
@@ -847,15 +841,21 @@ static int __init parse_memopt(char *p)
if (!p)
return -EINVAL;
-#ifdef CONFIG_X86_32
if (!strcmp(p, "nopentium")) {
+#ifdef CONFIG_X86_32
setup_clear_cpu_cap(X86_FEATURE_PSE);
return 0;
- }
+#else
+ printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n");
+ return -EINVAL;
#endif
+ }
userdef = 1;
mem_size = memparse(p, &p);
+ /* don't remove all of memory when handling "mem={invalid}" param */
+ if (mem_size == 0)
+ return -EINVAL;
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
return 0;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 76b8cd953dee..9efbdcc56425 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -143,15 +143,10 @@ static void __init ati_bugs(int num, int slot, int func)
static u32 __init ati_sbx00_rev(int num, int slot, int func)
{
- u32 old, d;
+ u32 d;
- d = read_pci_config(num, slot, func, 0x70);
- old = d;
- d &= ~(1<<8);
- write_pci_config(num, slot, func, 0x70, d);
d = read_pci_config(num, slot, func, 0x8);
d &= 0xff;
- write_pci_config(num, slot, func, 0x70, old);
return d;
}
@@ -160,13 +155,16 @@ static void __init ati_bugs_contd(int num, int slot, int func)
{
u32 d, rev;
- if (acpi_use_timer_override)
- return;
-
rev = ati_sbx00_rev(num, slot, func);
+ if (rev >= 0x40)
+ acpi_fix_pin2_polarity = 1;
+
if (rev > 0x13)
return;
+ if (acpi_use_timer_override)
+ return;
+
/* check for IRQ0 interrupt swap */
d = read_pci_config(num, slot, func, 0x64);
if (!(d & (1<<14)))
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 9ca3b0e343e5..5c1a91974918 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -65,6 +65,8 @@
#define sysexit_audit syscall_exit_work
#endif
+ .section .entry.text, "ax"
+
/*
* We use macros for low-level operations which need to be overridden
* for paravirtualization. The following will never clobber any registers:
@@ -395,7 +397,7 @@ sysenter_past_esp:
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
*/
- pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
+ pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
CFI_REL_OFFSET eip, 0
pushl_cfi %eax
@@ -788,7 +790,7 @@ ENDPROC(ptregs_clone)
*/
.section .init.rodata,"a"
ENTRY(interrupt)
-.text
+.section .entry.text, "ax"
.p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start)
@@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR
.endif
.previous
.long 1b
- .text
+ .section .entry.text, "ax"
vector=vector+1
.endif
.endr
@@ -1409,8 +1411,7 @@ END(general_protection)
#ifdef CONFIG_KVM_GUEST
ENTRY(async_page_fault)
RING0_EC_FRAME
- pushl $do_async_page_fault
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_async_page_fault
jmp error_code
CFI_ENDPROC
END(async_page_fault)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index aed1ffbeb0c9..b72b4a6466a9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -61,6 +61,8 @@
#define __AUDIT_ARCH_LE 0x40000000
.code64
+ .section .entry.text, "ax"
+
#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(mcount)
@@ -744,7 +746,7 @@ END(stub_rt_sigreturn)
*/
.section .init.rodata,"a"
ENTRY(interrupt)
- .text
+ .section .entry.text
.p2align 5
.p2align CONFIG_X86_L1_CACHE_SHIFT
ENTRY(irq_entries_start)
@@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR
.endif
.previous
.quad 1b
- .text
+ .section .entry.text
vector=vector+1
.endif
.endr
@@ -975,9 +977,12 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP
-.irpc idx, "01234567"
+.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+.if NUM_INVALIDATE_TLB_VECTORS > \idx
apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
invalidate_interrupt\idx smp_invalidate_interrupt
+.endif
.endr
#endif
@@ -1248,7 +1253,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
decl PER_CPU_VAR(irq_count)
jmp error_exit
CFI_ENDPROC
-END(do_hypervisor_callback)
+END(xen_do_hypervisor_callback)
/*
* Hypervisor uses this for application faults while it executes.
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 382eb2936d4d..a93742a57468 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -437,18 +437,19 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
return;
}
- if (ftrace_push_return_trace(old, self_addr, &trace.depth,
- frame_pointer) == -EBUSY) {
- *parent = old;
- return;
- }
-
trace.func = self_addr;
+ trace.depth = current->curr_ret_stack + 1;
/* Only trace if the calling function expects to */
if (!ftrace_graph_entry(&trace)) {
- current->curr_ret_stack--;
*parent = old;
+ return;
+ }
+
+ if (ftrace_push_return_trace(old, self_addr, &trace.depth,
+ frame_pointer) == -EBUSY) {
+ *parent = old;
+ return;
}
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 767d6c43de37..ce0be7cd085e 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -73,7 +73,7 @@ MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT
*/
KERNEL_PAGES = LOWMEM_PAGES
-INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
+INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE
RESERVE_BRK(pagetables, INIT_MAP_SIZE)
/*
@@ -137,7 +137,7 @@ ENTRY(startup_32)
movsl
1:
-#ifdef CONFIG_OLPC_OPENFIRMWARE
+#ifdef CONFIG_OLPC
/* save OFW's pgdir table for later use when calling into OFW */
movl %cr3, %eax
movl %eax, pa(olpc_ofw_pgd)
@@ -623,7 +623,7 @@ ENTRY(initial_code)
* BSS section
*/
__PAGE_ALIGNED_BSS
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
#ifdef CONFIG_X86_PAE
initial_pg_pmd:
.fill 1024*KPMDS,4,0
@@ -644,7 +644,7 @@ ENTRY(swapper_pg_dir)
#ifdef CONFIG_X86_PAE
__PAGE_ALIGNED_DATA
/* Page-aligned for the benefit of paravirt? */
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
ENTRY(initial_page_table)
.long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
# if KPMDS == 3
@@ -662,7 +662,7 @@ ENTRY(initial_page_table)
# else
# error "Kernel PMDs should be 1, 2 or 3"
# endif
- .align PAGE_SIZE_asm /* needs to be page-sized too */
+ .align PAGE_SIZE /* needs to be page-sized too */
#endif
.data
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 4ff5968f12d2..bfe8f729e086 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -503,7 +503,7 @@ static int hpet_assign_irq(struct hpet_dev *dev)
if (!irq)
return -EINVAL;
- set_irq_data(irq, dev);
+ irq_set_handler_data(irq, dev);
if (hpet_setup_msi_irq(irq))
return -EINVAL;
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 20757cb2efa3..d9ca749c123b 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -112,7 +112,7 @@ static void make_8259A_irq(unsigned int irq)
{
disable_irq_nosync(irq);
io_apic_irqs &= ~(1<<irq);
- set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
+ irq_set_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
i8259A_chip.name);
enable_irq(irq);
}
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 8eec0ec59af2..8c968974253d 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -14,22 +14,9 @@
#include <linux/slab.h>
#include <linux/thread_info.h>
#include <linux/syscalls.h>
+#include <linux/bitmap.h>
#include <asm/syscalls.h>
-/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
-static void set_bitmap(unsigned long *bitmap, unsigned int base,
- unsigned int extent, int new_value)
-{
- unsigned int i;
-
- for (i = base; i < base + extent; i++) {
- if (new_value)
- __set_bit(i, bitmap);
- else
- __clear_bit(i, bitmap);
- }
-}
-
/*
* this changes the io permissions bitmap in the current task.
*/
@@ -69,7 +56,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
*/
tss = &per_cpu(init_tss, get_cpu());
- set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
+ if (turn_on)
+ bitmap_clear(t->io_bitmap_ptr, from, num);
+ else
+ bitmap_set(t->io_bitmap_ptr, from, num);
/*
* Search for a (possibly new) maximum. This is simple and stupid,
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 387b6a0c9e81..948a31eae75f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -44,9 +44,9 @@ void ack_bad_irq(unsigned int irq)
#define irq_stats(x) (&per_cpu(irq_stat, x))
/*
- * /proc/interrupts printing:
+ * /proc/interrupts printing for arch specific interrupts
*/
-static int show_other_interrupts(struct seq_file *p, int prec)
+int arch_show_interrupts(struct seq_file *p, int prec)
{
int j;
@@ -122,59 +122,6 @@ static int show_other_interrupts(struct seq_file *p, int prec)
return 0;
}
-int show_interrupts(struct seq_file *p, void *v)
-{
- unsigned long flags, any_count = 0;
- int i = *(loff_t *) v, j, prec;
- struct irqaction *action;
- struct irq_desc *desc;
-
- if (i > nr_irqs)
- return 0;
-
- for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
- j *= 10;
-
- if (i == nr_irqs)
- return show_other_interrupts(p, prec);
-
- /* print header */
- if (i == 0) {
- seq_printf(p, "%*s", prec + 8, "");
- for_each_online_cpu(j)
- seq_printf(p, "CPU%-8d", j);
- seq_putc(p, '\n');
- }
-
- desc = irq_to_desc(i);
- if (!desc)
- return 0;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- for_each_online_cpu(j)
- any_count |= kstat_irqs_cpu(i, j);
- action = desc->action;
- if (!action && !any_count)
- goto out;
-
- seq_printf(p, "%*d: ", prec, i);
- for_each_online_cpu(j)
- seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
- seq_printf(p, " %8s", desc->irq_data.chip->name);
- seq_printf(p, "-%-8s", desc->name);
-
- if (action) {
- seq_printf(p, " %s", action->name);
- while ((action = action->next) != NULL)
- seq_printf(p, ", %s", action->name);
- }
-
- seq_putc(p, '\n');
-out:
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- return 0;
-}
-
/*
* /proc/stat helpers
*/
@@ -276,15 +223,6 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
-#ifdef CONFIG_OF
-unsigned int irq_create_of_mapping(struct device_node *controller,
- const u32 *intspec, unsigned int intsize)
-{
- return intspec[0];
-}
-EXPORT_SYMBOL_GPL(irq_create_of_mapping);
-#endif
-
#ifdef CONFIG_HOTPLUG_CPU
/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
void fixup_irqs(void)
@@ -293,6 +231,7 @@ void fixup_irqs(void)
static int warned;
struct irq_desc *desc;
struct irq_data *data;
+ struct irq_chip *chip;
for_each_irq_desc(irq, desc) {
int break_affinity = 0;
@@ -307,10 +246,10 @@ void fixup_irqs(void)
/* interrupt's are disabled at this point */
raw_spin_lock(&desc->lock);
- data = &desc->irq_data;
+ data = irq_desc_get_irq_data(desc);
affinity = data->affinity;
if (!irq_has_action(irq) ||
- cpumask_equal(affinity, cpu_online_mask)) {
+ cpumask_subset(affinity, cpu_online_mask)) {
raw_spin_unlock(&desc->lock);
continue;
}
@@ -327,16 +266,17 @@ void fixup_irqs(void)
affinity = cpu_all_mask;
}
- if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask)
- data->chip->irq_mask(data);
+ chip = irq_data_get_irq_chip(data);
+ if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
+ chip->irq_mask(data);
- if (data->chip->irq_set_affinity)
- data->chip->irq_set_affinity(data, affinity, true);
+ if (chip->irq_set_affinity)
+ chip->irq_set_affinity(data, affinity, true);
else if (!(warned++))
set_affinity = 0;
- if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask)
- data->chip->irq_unmask(data);
+ if (!irqd_can_move_in_process_context(data) && chip->irq_unmask)
+ chip->irq_unmask(data);
raw_spin_unlock(&desc->lock);
@@ -368,10 +308,11 @@ void fixup_irqs(void)
irq = __this_cpu_read(vector_irq[vector]);
desc = irq_to_desc(irq);
- data = &desc->irq_data;
+ data = irq_desc_get_irq_data(desc);
+ chip = irq_data_get_irq_chip(data);
raw_spin_lock(&desc->lock);
- if (data->chip->irq_retrigger)
- data->chip->irq_retrigger(data);
+ if (chip->irq_retrigger)
+ chip->irq_retrigger(data);
raw_spin_unlock(&desc->lock);
}
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index c752e973958d..f470e4ef993e 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -25,6 +25,7 @@
#include <asm/setup.h>
#include <asm/i8259.h>
#include <asm/traps.h>
+#include <asm/prom.h>
/*
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
@@ -71,6 +72,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id)
static struct irqaction fpu_irq = {
.handler = math_error_irq,
.name = "fpu",
+ .flags = IRQF_NO_THREAD,
};
#endif
@@ -80,6 +82,7 @@ static struct irqaction fpu_irq = {
static struct irqaction irq2 = {
.handler = no_action,
.name = "cascade",
+ .flags = IRQF_NO_THREAD,
};
DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
@@ -110,7 +113,7 @@ void __init init_ISA_irqs(void)
legacy_pic->init(0);
for (i = 0; i < legacy_pic->nr_legacy_irqs; i++)
- set_irq_chip_and_handler_name(i, chip, handle_level_irq, name);
+ irq_set_chip_and_handler_name(i, chip, handle_level_irq, name);
}
void __init init_IRQ(void)
@@ -118,6 +121,12 @@ void __init init_IRQ(void)
int i;
/*
+ * We probably need a better place for this, but it works for
+ * now ...
+ */
+ x86_add_irq_domains();
+
+ /*
* On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15.
* If these IRQ's are handled by legacy interrupt-controllers like PIC,
* then this configuration will likely be static after the boot. If
@@ -164,14 +173,77 @@ static void __init smp_intr_init(void)
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPIs for invalidation */
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
+#define ALLOC_INVTLB_VEC(NR) \
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \
+ invalidate_interrupt##NR)
+
+ switch (NUM_INVALIDATE_TLB_VECTORS) {
+ default:
+ ALLOC_INVTLB_VEC(31);
+ case 31:
+ ALLOC_INVTLB_VEC(30);
+ case 30:
+ ALLOC_INVTLB_VEC(29);
+ case 29:
+ ALLOC_INVTLB_VEC(28);
+ case 28:
+ ALLOC_INVTLB_VEC(27);
+ case 27:
+ ALLOC_INVTLB_VEC(26);
+ case 26:
+ ALLOC_INVTLB_VEC(25);
+ case 25:
+ ALLOC_INVTLB_VEC(24);
+ case 24:
+ ALLOC_INVTLB_VEC(23);
+ case 23:
+ ALLOC_INVTLB_VEC(22);
+ case 22:
+ ALLOC_INVTLB_VEC(21);
+ case 21:
+ ALLOC_INVTLB_VEC(20);
+ case 20:
+ ALLOC_INVTLB_VEC(19);
+ case 19:
+ ALLOC_INVTLB_VEC(18);
+ case 18:
+ ALLOC_INVTLB_VEC(17);
+ case 17:
+ ALLOC_INVTLB_VEC(16);
+ case 16:
+ ALLOC_INVTLB_VEC(15);
+ case 15:
+ ALLOC_INVTLB_VEC(14);
+ case 14:
+ ALLOC_INVTLB_VEC(13);
+ case 13:
+ ALLOC_INVTLB_VEC(12);
+ case 12:
+ ALLOC_INVTLB_VEC(11);
+ case 11:
+ ALLOC_INVTLB_VEC(10);
+ case 10:
+ ALLOC_INVTLB_VEC(9);
+ case 9:
+ ALLOC_INVTLB_VEC(8);
+ case 8:
+ ALLOC_INVTLB_VEC(7);
+ case 7:
+ ALLOC_INVTLB_VEC(6);
+ case 6:
+ ALLOC_INVTLB_VEC(5);
+ case 5:
+ ALLOC_INVTLB_VEC(4);
+ case 4:
+ ALLOC_INVTLB_VEC(3);
+ case 3:
+ ALLOC_INVTLB_VEC(2);
+ case 2:
+ ALLOC_INVTLB_VEC(1);
+ case 1:
+ ALLOC_INVTLB_VEC(0);
+ break;
+ }
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
@@ -243,7 +315,7 @@ void __init native_init_IRQ(void)
set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
}
- if (!acpi_ioapic)
+ if (!acpi_ioapic && !of_ioapic)
setup_irq(2, &irq2);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index a4130005028a..7c64c420a9f6 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -533,15 +533,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
}
return NOTIFY_DONE;
- case DIE_NMIWATCHDOG:
- if (atomic_read(&kgdb_active) != -1) {
- /* KGDB CPU roundup: */
- kgdb_nmicallback(raw_smp_processor_id(), regs);
- return NOTIFY_STOP;
- }
- /* Enter debugger: */
- break;
-
case DIE_DEBUG:
if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
if (user_mode(regs))
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index d91c477b3f62..c969fd9d1566 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1276,6 +1276,14 @@ static int __kprobes can_optimize(unsigned long paddr)
if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
return 0;
+ /*
+ * Do not optimize in the entry code due to the unstable
+ * stack handling.
+ */
+ if ((paddr >= (unsigned long )__entry_text_start) &&
+ (paddr < (unsigned long )__entry_text_end))
+ return 0;
+
/* Check there is enough space for a relative jump. */
if (size - offset < RELATIVEJUMP_SIZE)
return 0;
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 0fe6d1a66c38..c5610384ab16 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -66,7 +66,6 @@ struct microcode_amd {
unsigned int mpb[0];
};
-#define UCODE_MAX_SIZE 2048
#define UCODE_CONTAINER_SECTION_HDR 8
#define UCODE_CONTAINER_HEADER_SIZE 12
@@ -77,20 +76,20 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
struct cpuinfo_x86 *c = &cpu_data(cpu);
u32 dummy;
- memset(csig, 0, sizeof(*csig));
if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
- pr_warning("microcode: CPU%d: AMD CPU family 0x%x not "
- "supported\n", cpu, c->x86);
+ pr_warning("CPU%d: family %d not supported\n", cpu, c->x86);
return -1;
}
+
rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
- pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev);
+ pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
+
return 0;
}
-static int get_matching_microcode(int cpu, void *mc, int rev)
+static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr,
+ int rev)
{
- struct microcode_header_amd *mc_header = mc;
unsigned int current_cpu_id;
u16 equiv_cpu_id = 0;
unsigned int i = 0;
@@ -109,17 +108,17 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
if (!equiv_cpu_id)
return 0;
- if (mc_header->processor_rev_id != equiv_cpu_id)
+ if (mc_hdr->processor_rev_id != equiv_cpu_id)
return 0;
/* ucode might be chipset specific -- currently we don't support this */
- if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
- pr_err("CPU%d: loading of chipset specific code not yet supported\n",
+ if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
+ pr_err("CPU%d: chipset specific code not yet supported\n",
cpu);
return 0;
}
- if (mc_header->patch_id <= rev)
+ if (mc_hdr->patch_id <= rev)
return 0;
return 1;
@@ -144,71 +143,93 @@ static int apply_microcode_amd(int cpu)
/* check current patch id and patch's id for match */
if (rev != mc_amd->hdr.patch_id) {
- pr_err("CPU%d: update failed (for patch_level=0x%x)\n",
+ pr_err("CPU%d: update failed for patch_level=0x%08x\n",
cpu, mc_amd->hdr.patch_id);
return -1;
}
- pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev);
+ pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
uci->cpu_sig.rev = rev;
return 0;
}
-static void *
-get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
+static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size)
{
- unsigned int total_size;
- u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
- void *mc;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ unsigned int max_size, actual_size;
+
+#define F1XH_MPB_MAX_SIZE 2048
+#define F14H_MPB_MAX_SIZE 1824
+#define F15H_MPB_MAX_SIZE 4096
+
+ switch (c->x86) {
+ case 0x14:
+ max_size = F14H_MPB_MAX_SIZE;
+ break;
+ case 0x15:
+ max_size = F15H_MPB_MAX_SIZE;
+ break;
+ default:
+ max_size = F1XH_MPB_MAX_SIZE;
+ break;
+ }
- get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR);
+ actual_size = buf[4] + (buf[5] << 8);
- if (section_hdr[0] != UCODE_UCODE_TYPE) {
- pr_err("error: invalid type field in container file section header\n");
- return NULL;
+ if (actual_size > size || actual_size > max_size) {
+ pr_err("section size mismatch\n");
+ return 0;
}
- total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
+ return actual_size;
+}
- if (total_size > size || total_size > UCODE_MAX_SIZE) {
- pr_err("error: size mismatch\n");
- return NULL;
+static struct microcode_header_amd *
+get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size)
+{
+ struct microcode_header_amd *mc = NULL;
+ unsigned int actual_size = 0;
+
+ if (buf[0] != UCODE_UCODE_TYPE) {
+ pr_err("invalid type field in container file section header\n");
+ goto out;
}
- mc = vzalloc(UCODE_MAX_SIZE);
+ actual_size = verify_ucode_size(cpu, buf, size);
+ if (!actual_size)
+ goto out;
+
+ mc = vzalloc(actual_size);
if (!mc)
- return NULL;
+ goto out;
- get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size);
- *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
+ get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, actual_size);
+ *mc_size = actual_size + UCODE_CONTAINER_SECTION_HDR;
+out:
return mc;
}
static int install_equiv_cpu_table(const u8 *buf)
{
- u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
- unsigned int *buf_pos = (unsigned int *)container_hdr;
- unsigned long size;
-
- get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE);
-
- size = buf_pos[2];
-
- if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
- pr_err("error: invalid type field in container file section header\n");
- return 0;
+ unsigned int *ibuf = (unsigned int *)buf;
+ unsigned int type = ibuf[1];
+ unsigned int size = ibuf[2];
+
+ if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
+ pr_err("empty section/"
+ "invalid type field in container file section header\n");
+ return -EINVAL;
}
equiv_cpu_table = vmalloc(size);
if (!equiv_cpu_table) {
pr_err("failed to allocate equivalent CPU table\n");
- return 0;
+ return -ENOMEM;
}
- buf += UCODE_CONTAINER_HEADER_SIZE;
- get_ucode_data(equiv_cpu_table, buf, size);
+ get_ucode_data(equiv_cpu_table, buf + UCODE_CONTAINER_HEADER_SIZE, size);
return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
}
@@ -223,16 +244,16 @@ static enum ucode_state
generic_load_microcode(int cpu, const u8 *data, size_t size)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct microcode_header_amd *mc_hdr = NULL;
+ unsigned int mc_size, leftover;
+ int offset;
const u8 *ucode_ptr = data;
void *new_mc = NULL;
- void *mc;
- int new_rev = uci->cpu_sig.rev;
- unsigned int leftover;
- unsigned long offset;
+ unsigned int new_rev = uci->cpu_sig.rev;
enum ucode_state state = UCODE_OK;
offset = install_equiv_cpu_table(ucode_ptr);
- if (!offset) {
+ if (offset < 0) {
pr_err("failed to create equivalent cpu table\n");
return UCODE_ERROR;
}
@@ -241,64 +262,65 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
leftover = size - offset;
while (leftover) {
- unsigned int uninitialized_var(mc_size);
- struct microcode_header_amd *mc_header;
-
- mc = get_next_ucode(ucode_ptr, leftover, &mc_size);
- if (!mc)
+ mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size);
+ if (!mc_hdr)
break;
- mc_header = (struct microcode_header_amd *)mc;
- if (get_matching_microcode(cpu, mc, new_rev)) {
+ if (get_matching_microcode(cpu, mc_hdr, new_rev)) {
vfree(new_mc);
- new_rev = mc_header->patch_id;
- new_mc = mc;
+ new_rev = mc_hdr->patch_id;
+ new_mc = mc_hdr;
} else
- vfree(mc);
+ vfree(mc_hdr);
ucode_ptr += mc_size;
leftover -= mc_size;
}
- if (new_mc) {
- if (!leftover) {
- vfree(uci->mc);
- uci->mc = new_mc;
- pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
- cpu, new_rev, uci->cpu_sig.rev);
- } else {
- vfree(new_mc);
- state = UCODE_ERROR;
- }
- } else
+ if (!new_mc) {
state = UCODE_NFOUND;
+ goto free_table;
+ }
+ if (!leftover) {
+ vfree(uci->mc);
+ uci->mc = new_mc;
+ pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
+ cpu, uci->cpu_sig.rev, new_rev);
+ } else {
+ vfree(new_mc);
+ state = UCODE_ERROR;
+ }
+
+free_table:
free_equiv_cpu_table();
return state;
}
-static enum ucode_state request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_amd(int cpu, struct device *device)
{
const char *fw_name = "amd-ucode/microcode_amd.bin";
- const struct firmware *firmware;
- enum ucode_state ret;
+ const struct firmware *fw;
+ enum ucode_state ret = UCODE_NFOUND;
- if (request_firmware(&firmware, fw_name, device)) {
- printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
- return UCODE_NFOUND;
+ if (request_firmware(&fw, fw_name, device)) {
+ pr_err("failed to load file %s\n", fw_name);
+ goto out;
}
- if (*(u32 *)firmware->data != UCODE_MAGIC) {
- pr_err("invalid UCODE_MAGIC (0x%08x)\n",
- *(u32 *)firmware->data);
- return UCODE_ERROR;
+ ret = UCODE_ERROR;
+ if (*(u32 *)fw->data != UCODE_MAGIC) {
+ pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data);
+ goto fw_release;
}
- ret = generic_load_microcode(cpu, firmware->data, firmware->size);
+ ret = generic_load_microcode(cpu, fw->data, fw->size);
- release_firmware(firmware);
+fw_release:
+ release_firmware(fw);
+out:
return ret;
}
@@ -319,7 +341,7 @@ static void microcode_fini_cpu_amd(int cpu)
static struct microcode_ops microcode_amd_ops = {
.request_microcode_user = request_microcode_user,
- .request_microcode_fw = request_microcode_fw,
+ .request_microcode_fw = request_microcode_amd,
.collect_cpu_info = collect_cpu_info_amd,
.apply_microcode = apply_microcode_amd,
.microcode_fini_cpu = microcode_fini_cpu_amd,
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 1cca374a2bac..87af68e0e1e1 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -417,8 +417,10 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
if (err)
return err;
- if (microcode_init_cpu(cpu) == UCODE_ERROR)
- err = -EINVAL;
+ if (microcode_init_cpu(cpu) == UCODE_ERROR) {
+ sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+ return -EINVAL;
+ }
return err;
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ff4554198981..99fa3adf0141 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -110,12 +110,9 @@ void show_regs_common(void)
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- printk(KERN_CONT " ");
- printk(KERN_CONT "%s %s", vendor, product);
- if (board) {
- printk(KERN_CONT "/");
- printk(KERN_CONT "%s", board);
- }
+ printk(KERN_CONT " %s %s", vendor, product);
+ if (board)
+ printk(KERN_CONT "/%s", board);
printk(KERN_CONT "\n");
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 10c6619c0543..d3ce37edb54d 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -285,6 +285,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
},
},
+ { /* Handle problems with rebooting on VersaLogic Menlow boards */
+ .callback = set_bios_reboot,
+ .ident = "VersaLogic Menlow based board",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "VersaLogic Corporation"),
+ DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 6f39cab052d5..3f2ad2640d85 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -6,6 +6,7 @@
#include <linux/acpi.h>
#include <linux/bcd.h>
#include <linux/pnp.h>
+#include <linux/of.h>
#include <asm/vsyscall.h>
#include <asm/x86_init.h>
@@ -236,6 +237,8 @@ static __init int add_rtc_cmos(void)
}
}
#endif
+ if (of_have_populated_dt())
+ return 0;
platform_device_register(&rtc_device);
dev_info(&rtc_device.dev,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a089fc19ffae..9d43b28e0728 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -113,6 +113,7 @@
#endif
#include <asm/mce.h>
#include <asm/alternative.h>
+#include <asm/prom.h>
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@@ -293,10 +294,32 @@ static void __init init_gbpages(void)
else
direct_gbpages = 0;
}
+
+static void __init cleanup_highmap_brk_end(void)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+
+ mmu_cr4_features = read_cr4();
+
+ /*
+ * _brk_end cannot change anymore, but it and _end may be
+ * located on different 2M pages. cleanup_highmap(), however,
+ * can only consider _end when it runs, so destroy any
+ * mappings beyond _brk_end here.
+ */
+ pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
+ pmd = pmd_offset(pud, _brk_end - 1);
+ while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
+ pmd_clear(pmd);
+}
#else
static inline void init_gbpages(void)
{
}
+static inline void cleanup_highmap_brk_end(void)
+{
+}
#endif
static void __init reserve_brk(void)
@@ -307,6 +330,8 @@ static void __init reserve_brk(void)
/* Mark brk area as locked down and no longer taking any
new allocations */
_brk_start = 0;
+
+ cleanup_highmap_brk_end();
}
#ifdef CONFIG_BLK_DEV_INITRD
@@ -429,16 +454,30 @@ static void __init parse_setup_data(void)
return;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
- data = early_memremap(pa_data, PAGE_SIZE);
+ u32 data_len, map_len;
+
+ map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
+ (u64)sizeof(struct setup_data));
+ data = early_memremap(pa_data, map_len);
+ data_len = data->len + sizeof(struct setup_data);
+ if (data_len > map_len) {
+ early_iounmap(data, map_len);
+ data = early_memremap(pa_data, data_len);
+ map_len = data_len;
+ }
+
switch (data->type) {
case SETUP_E820_EXT:
- parse_e820_ext(data, pa_data);
+ parse_e820_ext(data);
+ break;
+ case SETUP_DTB:
+ add_dtb(pa_data);
break;
default:
break;
}
pa_data = data->next;
- early_iounmap(data, PAGE_SIZE);
+ early_iounmap(data, map_len);
}
}
@@ -680,15 +719,6 @@ static int __init parse_reservelow(char *p)
early_param("reservelow", parse_reservelow);
-static u64 __init get_max_mapped(void)
-{
- u64 end = max_pfn_mapped;
-
- end <<= PAGE_SHIFT;
-
- return end;
-}
-
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -704,8 +734,6 @@ static u64 __init get_max_mapped(void)
void __init setup_arch(char **cmdline_p)
{
- int acpi = 0;
- int amd = 0;
unsigned long flags;
#ifdef CONFIG_X86_32
@@ -977,19 +1005,7 @@ void __init setup_arch(char **cmdline_p)
early_acpi_boot_init();
-#ifdef CONFIG_ACPI_NUMA
- /*
- * Parse SRAT to discover nodes.
- */
- acpi = acpi_numa_init();
-#endif
-
-#ifdef CONFIG_AMD_NUMA
- if (!acpi)
- amd = !amd_numa_init(0, max_pfn);
-#endif
-
- initmem_init(0, max_pfn, acpi, amd);
+ initmem_init();
memblock_find_dma_reserve();
dma32_reserve_bootmem();
@@ -1022,8 +1038,8 @@ void __init setup_arch(char **cmdline_p)
* Read APIC and some other early information from ACPI tables.
*/
acpi_boot_init();
-
sfi_init();
+ x86_dtb_init();
/*
* get boot-time SMP configuration:
@@ -1033,9 +1049,7 @@ void __init setup_arch(char **cmdline_p)
prefill_possible_map();
-#ifdef CONFIG_X86_64
init_cpu_to_node();
-#endif
init_apic_mappings();
ioapic_and_gsi_init();
@@ -1059,6 +1073,8 @@ void __init setup_arch(char **cmdline_p)
#endif
x86_init.oem.banner();
+ x86_init.timers.wallclock_init();
+
mcheck_init();
local_irq_save(flags);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 002b79685f73..71f4727da373 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -225,10 +225,15 @@ void __init setup_per_cpu_areas(void)
per_cpu(x86_bios_cpu_apicid, cpu) =
early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#endif
+#ifdef CONFIG_X86_32
+ per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
+#endif
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
IRQ_STACK_SIZE - 64;
+#endif
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
@@ -242,7 +247,6 @@ void __init setup_per_cpu_areas(void)
*/
set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
#endif
-#endif
/*
* Up to this point, the boot CPU has been using .init.data
* area. Reload any changed state for the boot CPU.
@@ -256,7 +260,10 @@ void __init setup_per_cpu_areas(void)
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+#ifdef CONFIG_X86_32
+ early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
+#endif
+#ifdef CONFIG_NUMA
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 545273369efa..c2871d3c71b6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -64,6 +64,7 @@
#include <asm/mtrr.h>
#include <asm/mwait.h>
#include <asm/apic.h>
+#include <asm/io_apic.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
#include <linux/mc146818rtc.h>
@@ -71,10 +72,6 @@
#include <asm/smpboot_hooks.h>
#include <asm/i8259.h>
-#ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_APICID];
-#endif
-
/* State of each CPU */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
@@ -130,68 +127,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
+
/* Per CPU bogomips and other parameters */
DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
atomic_t init_deasserted;
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
-/* which node each logical CPU is on */
-int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_to_node_map);
-
-/* set up a mapping between cpu and node. */
-static void map_cpu_to_node(int cpu, int node)
-{
- printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
- cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
- cpu_to_node_map[cpu] = node;
-}
-
-/* undo a mapping between cpu and node. */
-static void unmap_cpu_to_node(int cpu)
-{
- int node;
-
- printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
- for (node = 0; node < MAX_NUMNODES; node++)
- cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
- cpu_to_node_map[cpu] = 0;
-}
-#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
-#define map_cpu_to_node(cpu, node) ({})
-#define unmap_cpu_to_node(cpu) ({})
-#endif
-
-#ifdef CONFIG_X86_32
-static int boot_cpu_logical_apicid;
-
-u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
- { [0 ... NR_CPUS-1] = BAD_APICID };
-
-static void map_cpu_to_logical_apicid(void)
-{
- int cpu = smp_processor_id();
- int apicid = logical_smp_processor_id();
- int node = apic->apicid_to_node(apicid);
-
- if (!node_online(node))
- node = first_online_node;
-
- cpu_2_logical_apicid[cpu] = apicid;
- map_cpu_to_node(cpu, node);
-}
-
-void numa_remove_cpu(int cpu)
-{
- cpu_2_logical_apicid[cpu] = BAD_APICID;
- unmap_cpu_to_node(cpu);
-}
-#else
-#define map_cpu_to_logical_apicid() do {} while (0)
-#endif
-
/*
* Report back to the Boot Processor.
* Running on AP.
@@ -259,7 +202,6 @@ static void __cpuinit smp_callin(void)
apic->smp_callin_clear_local_apic();
setup_local_APIC();
end_local_APIC_setup();
- map_cpu_to_logical_apicid();
/*
* Need to setup vector mappings before we enable interrupts.
@@ -355,23 +297,6 @@ notrace static void __cpuinit start_secondary(void *unused)
cpu_idle();
}
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* In this case, llc_shared_map is a pointer to a cpumask. */
-static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
- const struct cpuinfo_x86 *src)
-{
- struct cpumask *llc = dst->llc_shared_map;
- *dst = *src;
- dst->llc_shared_map = llc;
-}
-#else
-static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
- const struct cpuinfo_x86 *src)
-{
- *dst = *src;
-}
-#endif /* CONFIG_CPUMASK_OFFSTACK */
-
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
@@ -381,7 +306,7 @@ void __cpuinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
- copy_cpuinfo_x86(c, &boot_cpu_data);
+ *c = boot_cpu_data;
c->cpu_index = id;
if (id != 0)
identify_secondary_cpu(c);
@@ -389,15 +314,12 @@ void __cpuinit smp_store_cpu_info(int id)
static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
{
- struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
- struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
-
cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
- cpumask_set_cpu(cpu1, c2->llc_shared_map);
- cpumask_set_cpu(cpu2, c1->llc_shared_map);
+ cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
+ cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
}
@@ -414,6 +336,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
if (c->phys_proc_id == o->phys_proc_id &&
+ per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
c->compute_unit_id == o->compute_unit_id)
link_thread_siblings(cpu, i);
} else if (c->phys_proc_id == o->phys_proc_id &&
@@ -425,7 +348,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
}
- cpumask_set_cpu(cpu, c->llc_shared_map);
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
@@ -436,8 +359,8 @@ void __cpuinit set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
- cpumask_set_cpu(i, c->llc_shared_map);
- cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map);
+ cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
}
if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
cpumask_set_cpu(i, cpu_core_mask(cpu));
@@ -476,7 +399,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
!(cpu_has(c, X86_FEATURE_AMD_DCM)))
return cpu_core_mask(cpu);
else
- return c->llc_shared_map;
+ return cpu_llc_shared_mask(cpu);
}
static void impress_friends(void)
@@ -947,6 +870,14 @@ int __cpuinit native_cpu_up(unsigned int cpu)
return 0;
}
+/**
+ * arch_disable_smp_support() - disables SMP support for x86 at runtime
+ */
+void arch_disable_smp_support(void)
+{
+ disable_ioapic_support();
+}
+
/*
* Fall back to non SMP mode after errors.
*
@@ -962,7 +893,6 @@ static __init void disable_smp(void)
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
else
physid_set_mask_of_physid(0, &phys_cpu_present_map);
- map_cpu_to_logical_apicid();
cpumask_set_cpu(0, cpu_sibling_mask(0));
cpumask_set_cpu(0, cpu_core_mask(0));
}
@@ -1047,7 +977,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
"(tell your hw vendor)\n");
}
smpboot_clear_io_apic();
- arch_disable_smp_support();
+ disable_ioapic_support();
return -1;
}
@@ -1091,21 +1021,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
preempt_disable();
smp_cpu_index_default();
- memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info));
- cpumask_copy(cpu_callin_mask, cpumask_of(0));
- mb();
+
/*
* Setup boot CPU information
*/
smp_store_cpu_info(0); /* Final full version of the data */
-#ifdef CONFIG_X86_32
- boot_cpu_logical_apicid = logical_smp_processor_id();
-#endif
+ cpumask_copy(cpu_callin_mask, cpumask_of(0));
+ mb();
+
current_thread_info()->cpu = 0; /* needed? */
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
@@ -1141,8 +1069,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
bsp_end_local_APIC_setup();
- map_cpu_to_logical_apicid();
-
if (apic->setup_portio_remap)
apic->setup_portio_remap();
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index b35786dc9b8f..5f181742e8f9 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -340,3 +340,6 @@ ENTRY(sys_call_table)
.long sys_fanotify_init
.long sys_fanotify_mark
.long sys_prlimit64 /* 340 */
+ .long sys_name_to_handle_at
+ .long sys_open_by_handle_at
+ .long sys_clock_adjtime
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index cb2c5069b016..624a2016198e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -105,6 +105,7 @@ SECTIONS
SCHED_TEXT
LOCK_TEXT
KPROBES_TEXT
+ ENTRY_TEXT
IRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
@@ -230,7 +231,7 @@ SECTIONS
* output PHDR, so the next output section - .init.text - should
* start another segment - init.
*/
- PERCPU_VADDR(0, :percpu)
+ PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
#endif
INIT_TEXT_SECTION(PAGE_SIZE)
@@ -318,7 +319,7 @@ SECTIONS
}
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
- PERCPU(THREAD_SIZE)
+ PERCPU(INTERNODE_CACHE_BYTES, PAGE_SIZE)
#endif
. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 1b950d151e58..9796c2f3d074 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -52,6 +52,7 @@ extern void *__memcpy(void *, const void *, __kernel_size_t);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(empty_zero_page);
#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ceb2911aa439..c11514e9128b 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -70,6 +70,7 @@ struct x86_init_ops x86_init __initdata = {
.setup_percpu_clockev = setup_boot_APIC_clock,
.tsc_pre_init = x86_init_noop,
.timer_init = hpet_time_init,
+ .wallclock_init = x86_init_noop,
},
.iommu = {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 54ce246a383e..63fec1531e89 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2777,6 +2777,8 @@ static int dr_interception(struct vcpu_svm *svm)
kvm_register_write(&svm->vcpu, reg, val);
}
+ skip_emulated_instruction(&svm->vcpu);
+
return 1;
}
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 1357d7cf4ec8..db932760ea82 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -62,21 +62,21 @@ TRACE_EVENT(kvm_hv_hypercall,
TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa),
TP_STRUCT__entry(
- __field( __u16, code )
- __field( bool, fast )
__field( __u16, rep_cnt )
__field( __u16, rep_idx )
__field( __u64, ingpa )
__field( __u64, outgpa )
+ __field( __u16, code )
+ __field( bool, fast )
),
TP_fast_assign(
- __entry->code = code;
- __entry->fast = fast;
__entry->rep_cnt = rep_cnt;
__entry->rep_idx = rep_idx;
__entry->ingpa = ingpa;
__entry->outgpa = outgpa;
+ __entry->code = code;
+ __entry->fast = fast;
),
TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx",
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index eba687f0cc0c..b9ec1c74943c 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -847,7 +847,7 @@ static void __init lguest_init_IRQ(void)
void lguest_setup_irq(unsigned int irq)
{
irq_alloc_desc_at(irq, 0);
- set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
+ irq_set_chip_and_handler_name(irq, &lguest_irq_controller,
handle_level_irq, "level");
}
@@ -995,7 +995,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
static void lguest_time_init(void)
{
/* Set up the timer interrupt (0) to go to our simple timer routine */
- set_irq_handler(0, lguest_time_irq);
+ irq_set_handler(0, lguest_time_irq);
clocksource_register(&lguest_clock);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index e10cf070ede0..f2479f19ddde 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -42,4 +42,5 @@ else
lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o
+ lib-y += cmpxchg16b_emu.o
endif
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index 2cda60a06e65..e8e7e0d06f42 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -15,14 +15,12 @@
/* if you want SMP support, implement these with real spinlocks */
.macro LOCK reg
- pushfl
- CFI_ADJUST_CFA_OFFSET 4
+ pushfl_cfi
cli
.endm
.macro UNLOCK reg
- popfl
- CFI_ADJUST_CFA_OFFSET -4
+ popfl_cfi
.endm
#define BEGIN(op) \
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 71e080de3352..391a083674b4 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -14,14 +14,12 @@
#include <asm/dwarf2.h>
.macro SAVE reg
- pushl %\reg
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %\reg
CFI_REL_OFFSET \reg, 0
.endm
.macro RESTORE reg
- popl %\reg
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %\reg
CFI_RESTORE \reg
.endm
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index adbccd0bbb78..78d16a554db0 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -50,11 +50,9 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
*/
ENTRY(csum_partial)
CFI_STARTPROC
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
@@ -132,11 +130,9 @@ ENTRY(csum_partial)
jz 8f
roll $8, %eax
8:
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
ret
CFI_ENDPROC
@@ -148,11 +144,9 @@ ENDPROC(csum_partial)
ENTRY(csum_partial)
CFI_STARTPROC
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
@@ -260,11 +254,9 @@ ENTRY(csum_partial)
jz 90f
roll $8, %eax
90:
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
ret
CFI_ENDPROC
@@ -309,14 +301,11 @@ ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
subl $4,%esp
CFI_ADJUST_CFA_OFFSET 4
- pushl %edi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edi
CFI_REL_OFFSET edi, 0
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl ARGBASE+16(%esp),%eax # sum
movl ARGBASE+12(%esp),%ecx # len
@@ -426,17 +415,13 @@ DST( movb %cl, (%edi) )
.previous
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
- popl %edi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edi
CFI_RESTORE edi
- popl %ecx # equivalent to addl $4,%esp
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx # equivalent to addl $4,%esp
ret
CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)
@@ -459,14 +444,11 @@ ENDPROC(csum_partial_copy_generic)
ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
- pushl %edi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edi
CFI_REL_OFFSET edi, 0
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
movl ARGBASE+4(%esp),%esi #src
movl ARGBASE+8(%esp),%edi #dst
@@ -527,14 +509,11 @@ DST( movb %dl, (%edi) )
jmp 7b
.previous
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
- popl %edi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edi
CFI_RESTORE edi
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
ret
CFI_ENDPROC
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
new file mode 100644
index 000000000000..3e8b08a6de2b
--- /dev/null
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -0,0 +1,59 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ */
+#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
+#include <asm/frame.h>
+#include <asm/dwarf2.h>
+
+.text
+
+/*
+ * Inputs:
+ * %rsi : memory location to compare
+ * %rax : low 64 bits of old value
+ * %rdx : high 64 bits of old value
+ * %rbx : low 64 bits of new value
+ * %rcx : high 64 bits of new value
+ * %al : Operation successful
+ */
+ENTRY(this_cpu_cmpxchg16b_emu)
+CFI_STARTPROC
+
+#
+# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
+# via the ZF. Caller will access %al to get result.
+#
+# Note that this is only useful for a cpuops operation. Meaning that we
+# do *not* have a fully atomic operation but just an operation that is
+# *atomic* on a single cpu (as provided by the this_cpu_xx class of
+# macros).
+#
+this_cpu_cmpxchg16b_emu:
+ pushf
+ cli
+
+ cmpq %gs:(%rsi), %rax
+ jne not_same
+ cmpq %gs:8(%rsi), %rdx
+ jne not_same
+
+ movq %rbx, %gs:(%rsi)
+ movq %rcx, %gs:8(%rsi)
+
+ popf
+ mov $1, %al
+ ret
+
+ not_same:
+ popf
+ xor %al,%al
+ ret
+
+CFI_ENDPROC
+
+ENDPROC(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
new file mode 100644
index 000000000000..0ecb8433e5a8
--- /dev/null
+++ b/arch/x86/lib/memmove_64.S
@@ -0,0 +1,197 @@
+/*
+ * Normally compiler builtins are used, but sometimes the compiler calls out
+ * of line code. Based on asm-i386/string.h.
+ *
+ * This assembly file is re-written from memmove_64.c file.
+ * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
+ */
+#define _STRING_C
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+#undef memmove
+
+/*
+ * Implement memmove(). This can handle overlap between src and dst.
+ *
+ * Input:
+ * rdi: dest
+ * rsi: src
+ * rdx: count
+ *
+ * Output:
+ * rax: dest
+ */
+ENTRY(memmove)
+ CFI_STARTPROC
+ /* Handle more 32bytes in loop */
+ mov %rdi, %rax
+ cmp $0x20, %rdx
+ jb 1f
+
+ /* Decide forward/backward copy mode */
+ cmp %rdi, %rsi
+ jb 2f
+
+ /*
+ * movsq instruction have many startup latency
+ * so we handle small size by general register.
+ */
+ cmp $680, %rdx
+ jb 3f
+ /*
+ * movsq instruction is only good for aligned case.
+ */
+
+ cmpb %dil, %sil
+ je 4f
+3:
+ sub $0x20, %rdx
+ /*
+ * We gobble 32byts forward in each loop.
+ */
+5:
+ sub $0x20, %rdx
+ movq 0*8(%rsi), %r11
+ movq 1*8(%rsi), %r10
+ movq 2*8(%rsi), %r9
+ movq 3*8(%rsi), %r8
+ leaq 4*8(%rsi), %rsi
+
+ movq %r11, 0*8(%rdi)
+ movq %r10, 1*8(%rdi)
+ movq %r9, 2*8(%rdi)
+ movq %r8, 3*8(%rdi)
+ leaq 4*8(%rdi), %rdi
+ jae 5b
+ addq $0x20, %rdx
+ jmp 1f
+ /*
+ * Handle data forward by movsq.
+ */
+ .p2align 4
+4:
+ movq %rdx, %rcx
+ movq -8(%rsi, %rdx), %r11
+ lea -8(%rdi, %rdx), %r10
+ shrq $3, %rcx
+ rep movsq
+ movq %r11, (%r10)
+ jmp 13f
+ /*
+ * Handle data backward by movsq.
+ */
+ .p2align 4
+7:
+ movq %rdx, %rcx
+ movq (%rsi), %r11
+ movq %rdi, %r10
+ leaq -8(%rsi, %rdx), %rsi
+ leaq -8(%rdi, %rdx), %rdi
+ shrq $3, %rcx
+ std
+ rep movsq
+ cld
+ movq %r11, (%r10)
+ jmp 13f
+
+ /*
+ * Start to prepare for backward copy.
+ */
+ .p2align 4
+2:
+ cmp $680, %rdx
+ jb 6f
+ cmp %dil, %sil
+ je 7b
+6:
+ /*
+ * Calculate copy position to tail.
+ */
+ addq %rdx, %rsi
+ addq %rdx, %rdi
+ subq $0x20, %rdx
+ /*
+ * We gobble 32byts backward in each loop.
+ */
+8:
+ subq $0x20, %rdx
+ movq -1*8(%rsi), %r11
+ movq -2*8(%rsi), %r10
+ movq -3*8(%rsi), %r9
+ movq -4*8(%rsi), %r8
+ leaq -4*8(%rsi), %rsi
+
+ movq %r11, -1*8(%rdi)
+ movq %r10, -2*8(%rdi)
+ movq %r9, -3*8(%rdi)
+ movq %r8, -4*8(%rdi)
+ leaq -4*8(%rdi), %rdi
+ jae 8b
+ /*
+ * Calculate copy position to head.
+ */
+ addq $0x20, %rdx
+ subq %rdx, %rsi
+ subq %rdx, %rdi
+1:
+ cmpq $16, %rdx
+ jb 9f
+ /*
+ * Move data from 16 bytes to 31 bytes.
+ */
+ movq 0*8(%rsi), %r11
+ movq 1*8(%rsi), %r10
+ movq -2*8(%rsi, %rdx), %r9
+ movq -1*8(%rsi, %rdx), %r8
+ movq %r11, 0*8(%rdi)
+ movq %r10, 1*8(%rdi)
+ movq %r9, -2*8(%rdi, %rdx)
+ movq %r8, -1*8(%rdi, %rdx)
+ jmp 13f
+ .p2align 4
+9:
+ cmpq $8, %rdx
+ jb 10f
+ /*
+ * Move data from 8 bytes to 15 bytes.
+ */
+ movq 0*8(%rsi), %r11
+ movq -1*8(%rsi, %rdx), %r10
+ movq %r11, 0*8(%rdi)
+ movq %r10, -1*8(%rdi, %rdx)
+ jmp 13f
+10:
+ cmpq $4, %rdx
+ jb 11f
+ /*
+ * Move data from 4 bytes to 7 bytes.
+ */
+ movl (%rsi), %r11d
+ movl -4(%rsi, %rdx), %r10d
+ movl %r11d, (%rdi)
+ movl %r10d, -4(%rdi, %rdx)
+ jmp 13f
+11:
+ cmp $2, %rdx
+ jb 12f
+ /*
+ * Move data from 2 bytes to 3 bytes.
+ */
+ movw (%rsi), %r11w
+ movw -2(%rsi, %rdx), %r10w
+ movw %r11w, (%rdi)
+ movw %r10w, -2(%rdi, %rdx)
+ jmp 13f
+12:
+ cmp $1, %rdx
+ jb 13f
+ /*
+ * Move data for 1 byte.
+ */
+ movb (%rsi), %r11b
+ movb %r11b, (%rdi)
+13:
+ retq
+ CFI_ENDPROC
+ENDPROC(memmove)
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
deleted file mode 100644
index 6d0f0ec41b34..000000000000
--- a/arch/x86/lib/memmove_64.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/* Normally compiler builtins are used, but sometimes the compiler calls out
- of line code. Based on asm-i386/string.h.
- */
-#define _STRING_C
-#include <linux/string.h>
-#include <linux/module.h>
-
-#undef memmove
-void *memmove(void *dest, const void *src, size_t count)
-{
- unsigned long d0,d1,d2,d3,d4,d5,d6,d7;
- char *ret;
-
- __asm__ __volatile__(
- /* Handle more 32bytes in loop */
- "mov %2, %3\n\t"
- "cmp $0x20, %0\n\t"
- "jb 1f\n\t"
-
- /* Decide forward/backward copy mode */
- "cmp %2, %1\n\t"
- "jb 2f\n\t"
-
- /*
- * movsq instruction have many startup latency
- * so we handle small size by general register.
- */
- "cmp $680, %0\n\t"
- "jb 3f\n\t"
- /*
- * movsq instruction is only good for aligned case.
- */
- "cmpb %%dil, %%sil\n\t"
- "je 4f\n\t"
- "3:\n\t"
- "sub $0x20, %0\n\t"
- /*
- * We gobble 32byts forward in each loop.
- */
- "5:\n\t"
- "sub $0x20, %0\n\t"
- "movq 0*8(%1), %4\n\t"
- "movq 1*8(%1), %5\n\t"
- "movq 2*8(%1), %6\n\t"
- "movq 3*8(%1), %7\n\t"
- "leaq 4*8(%1), %1\n\t"
-
- "movq %4, 0*8(%2)\n\t"
- "movq %5, 1*8(%2)\n\t"
- "movq %6, 2*8(%2)\n\t"
- "movq %7, 3*8(%2)\n\t"
- "leaq 4*8(%2), %2\n\t"
- "jae 5b\n\t"
- "addq $0x20, %0\n\t"
- "jmp 1f\n\t"
- /*
- * Handle data forward by movsq.
- */
- ".p2align 4\n\t"
- "4:\n\t"
- "movq %0, %8\n\t"
- "movq -8(%1, %0), %4\n\t"
- "lea -8(%2, %0), %5\n\t"
- "shrq $3, %8\n\t"
- "rep movsq\n\t"
- "movq %4, (%5)\n\t"
- "jmp 13f\n\t"
- /*
- * Handle data backward by movsq.
- */
- ".p2align 4\n\t"
- "7:\n\t"
- "movq %0, %8\n\t"
- "movq (%1), %4\n\t"
- "movq %2, %5\n\t"
- "leaq -8(%1, %0), %1\n\t"
- "leaq -8(%2, %0), %2\n\t"
- "shrq $3, %8\n\t"
- "std\n\t"
- "rep movsq\n\t"
- "cld\n\t"
- "movq %4, (%5)\n\t"
- "jmp 13f\n\t"
-
- /*
- * Start to prepare for backward copy.
- */
- ".p2align 4\n\t"
- "2:\n\t"
- "cmp $680, %0\n\t"
- "jb 6f \n\t"
- "cmp %%dil, %%sil\n\t"
- "je 7b \n\t"
- "6:\n\t"
- /*
- * Calculate copy position to tail.
- */
- "addq %0, %1\n\t"
- "addq %0, %2\n\t"
- "subq $0x20, %0\n\t"
- /*
- * We gobble 32byts backward in each loop.
- */
- "8:\n\t"
- "subq $0x20, %0\n\t"
- "movq -1*8(%1), %4\n\t"
- "movq -2*8(%1), %5\n\t"
- "movq -3*8(%1), %6\n\t"
- "movq -4*8(%1), %7\n\t"
- "leaq -4*8(%1), %1\n\t"
-
- "movq %4, -1*8(%2)\n\t"
- "movq %5, -2*8(%2)\n\t"
- "movq %6, -3*8(%2)\n\t"
- "movq %7, -4*8(%2)\n\t"
- "leaq -4*8(%2), %2\n\t"
- "jae 8b\n\t"
- /*
- * Calculate copy position to head.
- */
- "addq $0x20, %0\n\t"
- "subq %0, %1\n\t"
- "subq %0, %2\n\t"
- "1:\n\t"
- "cmpq $16, %0\n\t"
- "jb 9f\n\t"
- /*
- * Move data from 16 bytes to 31 bytes.
- */
- "movq 0*8(%1), %4\n\t"
- "movq 1*8(%1), %5\n\t"
- "movq -2*8(%1, %0), %6\n\t"
- "movq -1*8(%1, %0), %7\n\t"
- "movq %4, 0*8(%2)\n\t"
- "movq %5, 1*8(%2)\n\t"
- "movq %6, -2*8(%2, %0)\n\t"
- "movq %7, -1*8(%2, %0)\n\t"
- "jmp 13f\n\t"
- ".p2align 4\n\t"
- "9:\n\t"
- "cmpq $8, %0\n\t"
- "jb 10f\n\t"
- /*
- * Move data from 8 bytes to 15 bytes.
- */
- "movq 0*8(%1), %4\n\t"
- "movq -1*8(%1, %0), %5\n\t"
- "movq %4, 0*8(%2)\n\t"
- "movq %5, -1*8(%2, %0)\n\t"
- "jmp 13f\n\t"
- "10:\n\t"
- "cmpq $4, %0\n\t"
- "jb 11f\n\t"
- /*
- * Move data from 4 bytes to 7 bytes.
- */
- "movl (%1), %4d\n\t"
- "movl -4(%1, %0), %5d\n\t"
- "movl %4d, (%2)\n\t"
- "movl %5d, -4(%2, %0)\n\t"
- "jmp 13f\n\t"
- "11:\n\t"
- "cmp $2, %0\n\t"
- "jb 12f\n\t"
- /*
- * Move data from 2 bytes to 3 bytes.
- */
- "movw (%1), %4w\n\t"
- "movw -2(%1, %0), %5w\n\t"
- "movw %4w, (%2)\n\t"
- "movw %5w, -2(%2, %0)\n\t"
- "jmp 13f\n\t"
- "12:\n\t"
- "cmp $1, %0\n\t"
- "jb 13f\n\t"
- /*
- * Move data for 1 byte.
- */
- "movb (%1), %4b\n\t"
- "movb %4b, (%2)\n\t"
- "13:\n\t"
- : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) ,
- "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7)
- :"0" (count),
- "1" (src),
- "2" (dest)
- :"memory");
-
- return ret;
-
-}
-EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S
index 41fcf00e49df..67743977398b 100644
--- a/arch/x86/lib/rwsem_64.S
+++ b/arch/x86/lib/rwsem_64.S
@@ -23,43 +23,50 @@
#include <asm/dwarf2.h>
#define save_common_regs \
- pushq %rdi; \
- pushq %rsi; \
- pushq %rcx; \
- pushq %r8; \
- pushq %r9; \
- pushq %r10; \
- pushq %r11
+ pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
+ pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
+ pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
+ pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \
+ pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \
+ pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
+ pushq_cfi %r11; CFI_REL_OFFSET r11, 0
#define restore_common_regs \
- popq %r11; \
- popq %r10; \
- popq %r9; \
- popq %r8; \
- popq %rcx; \
- popq %rsi; \
- popq %rdi
+ popq_cfi %r11; CFI_RESTORE r11; \
+ popq_cfi %r10; CFI_RESTORE r10; \
+ popq_cfi %r9; CFI_RESTORE r9; \
+ popq_cfi %r8; CFI_RESTORE r8; \
+ popq_cfi %rcx; CFI_RESTORE rcx; \
+ popq_cfi %rsi; CFI_RESTORE rsi; \
+ popq_cfi %rdi; CFI_RESTORE rdi
/* Fix up special calling conventions */
ENTRY(call_rwsem_down_read_failed)
+ CFI_STARTPROC
save_common_regs
- pushq %rdx
+ pushq_cfi %rdx
+ CFI_REL_OFFSET rdx, 0
movq %rax,%rdi
call rwsem_down_read_failed
- popq %rdx
+ popq_cfi %rdx
+ CFI_RESTORE rdx
restore_common_regs
ret
- ENDPROC(call_rwsem_down_read_failed)
+ CFI_ENDPROC
+ENDPROC(call_rwsem_down_read_failed)
ENTRY(call_rwsem_down_write_failed)
+ CFI_STARTPROC
save_common_regs
movq %rax,%rdi
call rwsem_down_write_failed
restore_common_regs
ret
- ENDPROC(call_rwsem_down_write_failed)
+ CFI_ENDPROC
+ENDPROC(call_rwsem_down_write_failed)
ENTRY(call_rwsem_wake)
+ CFI_STARTPROC
decl %edx /* do nothing if still outstanding active readers */
jnz 1f
save_common_regs
@@ -67,15 +74,20 @@ ENTRY(call_rwsem_wake)
call rwsem_wake
restore_common_regs
1: ret
- ENDPROC(call_rwsem_wake)
+ CFI_ENDPROC
+ENDPROC(call_rwsem_wake)
/* Fix up special calling conventions */
ENTRY(call_rwsem_downgrade_wake)
+ CFI_STARTPROC
save_common_regs
- pushq %rdx
+ pushq_cfi %rdx
+ CFI_REL_OFFSET rdx, 0
movq %rax,%rdi
call rwsem_downgrade_wake
- popq %rdx
+ popq_cfi %rdx
+ CFI_RESTORE rdx
restore_common_regs
ret
- ENDPROC(call_rwsem_downgrade_wake)
+ CFI_ENDPROC
+ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S
index 648fe4741782..06691daa4108 100644
--- a/arch/x86/lib/semaphore_32.S
+++ b/arch/x86/lib/semaphore_32.S
@@ -36,7 +36,7 @@
*/
#ifdef CONFIG_SMP
ENTRY(__write_lock_failed)
- CFI_STARTPROC simple
+ CFI_STARTPROC
FRAME
2: LOCK_PREFIX
addl $ RW_LOCK_BIAS,(%eax)
@@ -74,29 +74,23 @@ ENTRY(__read_lock_failed)
/* Fix up special calling conventions */
ENTRY(call_rwsem_down_read_failed)
CFI_STARTPROC
- push %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx,0
- push %edx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edx
CFI_REL_OFFSET edx,0
call rwsem_down_read_failed
- pop %edx
- CFI_ADJUST_CFA_OFFSET -4
- pop %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edx
+ popl_cfi %ecx
ret
CFI_ENDPROC
ENDPROC(call_rwsem_down_read_failed)
ENTRY(call_rwsem_down_write_failed)
CFI_STARTPROC
- push %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx,0
calll rwsem_down_write_failed
- pop %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx
ret
CFI_ENDPROC
ENDPROC(call_rwsem_down_write_failed)
@@ -105,12 +99,10 @@ ENTRY(call_rwsem_wake)
CFI_STARTPROC
decw %dx /* do nothing if still outstanding active readers */
jnz 1f
- push %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx,0
call rwsem_wake
- pop %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx
1: ret
CFI_ENDPROC
ENDPROC(call_rwsem_wake)
@@ -118,17 +110,13 @@ ENTRY(call_rwsem_wake)
/* Fix up special calling conventions */
ENTRY(call_rwsem_downgrade_wake)
CFI_STARTPROC
- push %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx,0
- push %edx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edx
CFI_REL_OFFSET edx,0
call rwsem_downgrade_wake
- pop %edx
- CFI_ADJUST_CFA_OFFSET -4
- pop %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edx
+ popl_cfi %ecx
ret
CFI_ENDPROC
ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index 650b11e00ecc..2930ae05d773 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -7,24 +7,6 @@
#include <linux/linkage.h>
-#define ARCH_TRACE_IRQS_ON \
- pushl %eax; \
- pushl %ecx; \
- pushl %edx; \
- call trace_hardirqs_on; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
-#define ARCH_TRACE_IRQS_OFF \
- pushl %eax; \
- pushl %ecx; \
- pushl %edx; \
- call trace_hardirqs_off; \
- popl %edx; \
- popl %ecx; \
- popl %eax;
-
#ifdef CONFIG_TRACE_IRQFLAGS
/* put return address in eax (arg1) */
.macro thunk_ra name,func
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index bf9a7d5a5428..782b082c9ff7 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -22,26 +22,6 @@
CFI_ENDPROC
.endm
- /* rdi: arg1 ... normal C conventions. rax is passed from C. */
- .macro thunk_retrax name,func
- .globl \name
-\name:
- CFI_STARTPROC
- SAVE_ARGS
- call \func
- jmp restore_norax
- CFI_ENDPROC
- .endm
-
-
- .section .sched.text, "ax"
-#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
- thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
- thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed
- thunk rwsem_wake_thunk,rwsem_wake
- thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
-#endif
-
#ifdef CONFIG_TRACE_IRQFLAGS
/* put return address in rdi (arg1) */
.macro thunk_ra name,func
@@ -72,10 +52,3 @@ restore:
RESTORE_ARGS
ret
CFI_ENDPROC
-
- CFI_STARTPROC
- SAVE_ARGS
-restore_norax:
- RESTORE_ARGS 1
- ret
- CFI_ENDPROC
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 09df2f9a3d69..3e608edf9958 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o
obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o
obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
+obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f21962c435ed..0919c26820d4 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -26,9 +26,7 @@
#include <asm/apic.h>
#include <asm/amd_nb.h>
-static struct bootnode __initdata nodes[8];
static unsigned char __initdata nodeids[8];
-static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
static __init int find_northbridge(void)
{
@@ -51,7 +49,7 @@ static __init int find_northbridge(void)
return num;
}
- return -1;
+ return -ENOENT;
}
static __init void early_get_boot_cpu_id(void)
@@ -69,17 +67,18 @@ static __init void early_get_boot_cpu_id(void)
#endif
}
-int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
+int __init amd_numa_init(void)
{
- unsigned long start = PFN_PHYS(start_pfn);
- unsigned long end = PFN_PHYS(end_pfn);
+ unsigned long start = PFN_PHYS(0);
+ unsigned long end = PFN_PHYS(max_pfn);
unsigned numnodes;
unsigned long prevbase;
- int i, nb, found = 0;
+ int i, j, nb;
u32 nodeid, reg;
+ unsigned int bits, cores, apicid_base;
if (!early_pci_allowed())
- return -1;
+ return -EINVAL;
nb = find_northbridge();
if (nb < 0)
@@ -90,7 +89,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
reg = read_pci_config(0, nb, 0, 0x60);
numnodes = ((reg >> 4) & 0xF) + 1;
if (numnodes <= 1)
- return -1;
+ return -ENOENT;
pr_info("Number of physical nodes %d\n", numnodes);
@@ -121,9 +120,9 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
if ((base >> 8) & 3 || (limit >> 8) & 3) {
pr_err("Node %d using interleaving mode %lx/%lx\n",
nodeid, (base >> 8) & 3, (limit >> 8) & 3);
- return -1;
+ return -EINVAL;
}
- if (node_isset(nodeid, nodes_parsed)) {
+ if (node_isset(nodeid, numa_nodes_parsed)) {
pr_info("Node %d already present, skipping\n",
nodeid);
continue;
@@ -160,117 +159,28 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
if (prevbase > base) {
pr_err("Node map not sorted %lx,%lx\n",
prevbase, base);
- return -1;
+ return -EINVAL;
}
pr_info("Node %d MemBase %016lx Limit %016lx\n",
nodeid, base, limit);
- found++;
-
- nodes[nodeid].start = base;
- nodes[nodeid].end = limit;
-
prevbase = base;
-
- node_set(nodeid, nodes_parsed);
- }
-
- if (!found)
- return -1;
- return 0;
-}
-
-#ifdef CONFIG_NUMA_EMU
-static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
-void __init amd_get_nodes(struct bootnode *physnodes)
-{
- int i;
-
- for_each_node_mask(i, nodes_parsed) {
- physnodes[i].start = nodes[i].start;
- physnodes[i].end = nodes[i].end;
+ numa_add_memblk(nodeid, base, limit);
+ node_set(nodeid, numa_nodes_parsed);
}
-}
-
-static int __init find_node_by_addr(unsigned long addr)
-{
- int ret = NUMA_NO_NODE;
- int i;
-
- for (i = 0; i < 8; i++)
- if (addr >= nodes[i].start && addr < nodes[i].end) {
- ret = i;
- break;
- }
- return ret;
-}
-/*
- * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
- * setup to represent the physical topology but reflect the emulated
- * environment. For each emulated node, the real node which it appears on is
- * found and a fake pxm to nid mapping is created which mirrors the actual
- * locality. node_distance() then represents the correct distances between
- * emulated nodes by using the fake acpi mappings to pxms.
- */
-void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
-{
- unsigned int bits;
- unsigned int cores;
- unsigned int apicid_base = 0;
- int i;
+ if (!nodes_weight(numa_nodes_parsed))
+ return -ENOENT;
+ /*
+ * We seem to have valid NUMA configuration. Map apicids to nodes
+ * using the coreid bits from early_identify_cpu.
+ */
bits = boot_cpu_data.x86_coreid_bits;
cores = 1 << bits;
- early_get_boot_cpu_id();
- if (boot_cpu_physical_apicid > 0)
- apicid_base = boot_cpu_physical_apicid;
-
- for (i = 0; i < nr_nodes; i++) {
- int index;
- int nid;
- int j;
-
- nid = find_node_by_addr(nodes[i].start);
- if (nid == NUMA_NO_NODE)
- continue;
-
- index = nodeids[nid] << bits;
- if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
- for (j = apicid_base; j < cores + apicid_base; j++)
- fake_apicid_to_node[index + j] = i;
-#ifdef CONFIG_ACPI_NUMA
- __acpi_map_pxm_to_node(nid, i);
-#endif
- }
- memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
-}
-#endif /* CONFIG_NUMA_EMU */
-
-int __init amd_scan_nodes(void)
-{
- unsigned int bits;
- unsigned int cores;
- unsigned int apicid_base;
- int i;
-
- BUG_ON(nodes_empty(nodes_parsed));
- node_possible_map = nodes_parsed;
- memnode_shift = compute_hash_shift(nodes, 8, NULL);
- if (memnode_shift < 0) {
- pr_err("No NUMA node hash function found. Contact maintainer\n");
- return -1;
- }
- pr_info("Using node hash shift of %d\n", memnode_shift);
-
- /* use the coreid bits from early_identify_cpu */
- bits = boot_cpu_data.x86_coreid_bits;
- cores = (1<<bits);
apicid_base = 0;
+
/* get the APIC ID of the BSP early for systems with apicid lifting */
early_get_boot_cpu_id();
if (boot_cpu_physical_apicid > 0) {
@@ -278,17 +188,9 @@ int __init amd_scan_nodes(void)
apicid_base = boot_cpu_physical_apicid;
}
- for_each_node_mask(i, node_possible_map) {
- int j;
-
- memblock_x86_register_active_regions(i,
- nodes[i].start >> PAGE_SHIFT,
- nodes[i].end >> PAGE_SHIFT);
+ for_each_node_mask(i, numa_nodes_parsed)
for (j = apicid_base; j < cores + apicid_base; j++)
- apicid_to_node[(i << bits) + j] = i;
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
+ set_apicid_to_node((i << bits) + j, i);
- numa_init_array();
return 0;
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7d90ceb882a4..20e3f8702d1e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -229,15 +229,14 @@ void vmalloc_sync_all(void)
for (address = VMALLOC_START & PMD_MASK;
address >= TASK_SIZE && address < FIXADDR_TOP;
address += PMD_SIZE) {
-
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
spinlock_t *pgt_lock;
pmd_t *ret;
+ /* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
@@ -247,7 +246,7 @@ void vmalloc_sync_all(void)
if (!ret)
break;
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
}
@@ -828,6 +827,13 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
unsigned long address, unsigned int fault)
{
if (fault & VM_FAULT_OOM) {
+ /* Kernel mode? Handle exceptions or die: */
+ if (!(error_code & PF_USER)) {
+ up_read(&current->mm->mmap_sem);
+ no_context(regs, error_code, address);
+ return;
+ }
+
out_of_memory(regs, error_code, address);
} else {
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 947f42abe820..286d289b039b 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -18,9 +18,9 @@
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-unsigned long __initdata e820_table_start;
-unsigned long __meminitdata e820_table_end;
-unsigned long __meminitdata e820_table_top;
+unsigned long __initdata pgt_buf_start;
+unsigned long __meminitdata pgt_buf_end;
+unsigned long __meminitdata pgt_buf_top;
int after_bootmem;
@@ -33,7 +33,7 @@ int direct_gbpages
static void __init find_early_table_space(unsigned long end, int use_pse,
int use_gbpages)
{
- unsigned long puds, pmds, ptes, tables, start;
+ unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
phys_addr_t base;
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
@@ -65,29 +65,20 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
#ifdef CONFIG_X86_32
/* for fixmap */
tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
-#endif
- /*
- * RED-PEN putting page tables only on node 0 could
- * cause a hotspot and fill up ZONE_DMA. The page tables
- * need roughly 0.5KB per GB.
- */
-#ifdef CONFIG_X86_32
- start = 0x7000;
-#else
- start = 0x8000;
+ good_end = max_pfn_mapped << PAGE_SHIFT;
#endif
- base = memblock_find_in_range(start, max_pfn_mapped<<PAGE_SHIFT,
- tables, PAGE_SIZE);
+
+ base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
if (base == MEMBLOCK_ERROR)
panic("Cannot find space for the kernel page tables");
- e820_table_start = base >> PAGE_SHIFT;
- e820_table_end = e820_table_start;
- e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
+ pgt_buf_start = base >> PAGE_SHIFT;
+ pgt_buf_end = pgt_buf_start;
+ pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
- end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT);
+ end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
}
struct map_range {
@@ -279,30 +270,11 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
load_cr3(swapper_pg_dir);
#endif
-#ifdef CONFIG_X86_64
- if (!after_bootmem && !start) {
- pud_t *pud;
- pmd_t *pmd;
-
- mmu_cr4_features = read_cr4();
-
- /*
- * _brk_end cannot change anymore, but it and _end may be
- * located on different 2M pages. cleanup_highmap(), however,
- * can only consider _end when it runs, so destroy any
- * mappings beyond _brk_end here.
- */
- pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
- pmd = pmd_offset(pud, _brk_end - 1);
- while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
- pmd_clear(pmd);
- }
-#endif
__flush_tlb_all();
- if (!after_bootmem && e820_table_end > e820_table_start)
- memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT,
- e820_table_end << PAGE_SHIFT, "PGTABLE");
+ if (!after_bootmem && pgt_buf_end > pgt_buf_start)
+ memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
+ pgt_buf_end << PAGE_SHIFT, "PGTABLE");
if (!after_bootmem)
early_memtest(start, end);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c821074b7f0b..73ad7ebd6e9c 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -62,10 +62,10 @@ bool __read_mostly __vmalloc_start_set = false;
static __init void *alloc_low_page(void)
{
- unsigned long pfn = e820_table_end++;
+ unsigned long pfn = pgt_buf_end++;
void *adr;
- if (pfn >= e820_table_top)
+ if (pfn >= pgt_buf_top)
panic("alloc_low_page: ran out of memory");
adr = __va(pfn * PAGE_SIZE);
@@ -163,8 +163,8 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
if (pmd_idx_kmap_begin != pmd_idx_kmap_end
&& (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
&& (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
- && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
- || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
+ && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start
+ || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) {
pte_t *newpte;
int i;
@@ -644,8 +644,7 @@ void __init find_low_pfn_range(void)
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8)
+void __init initmem_init(void)
{
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 71a59296af80..a08a62cb136e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
for (address = start; address <= end; address += PGDIR_SIZE) {
const pgd_t *pgd_ref = pgd_offset_k(address);
- unsigned long flags;
struct page *page;
if (pgd_none(*pgd_ref))
continue;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
spinlock_t *pgt_lock;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ /* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
@@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
spin_unlock(pgt_lock);
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
}
@@ -314,7 +314,7 @@ void __init cleanup_highmap(void)
static __ref void *alloc_low_page(unsigned long *phys)
{
- unsigned long pfn = e820_table_end++;
+ unsigned long pfn = pgt_buf_end++;
void *adr;
if (after_bootmem) {
@@ -324,7 +324,7 @@ static __ref void *alloc_low_page(unsigned long *phys)
return adr;
}
- if (pfn >= e820_table_top)
+ if (pfn >= pgt_buf_top)
panic("alloc_low_page: ran out of memory");
adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
@@ -333,12 +333,28 @@ static __ref void *alloc_low_page(unsigned long *phys)
return adr;
}
+static __ref void *map_low_page(void *virt)
+{
+ void *adr;
+ unsigned long phys, left;
+
+ if (after_bootmem)
+ return virt;
+
+ phys = __pa(virt);
+ left = phys & (PAGE_SIZE - 1);
+ adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
+ adr = (void *)(((unsigned long)adr) | left);
+
+ return adr;
+}
+
static __ref void unmap_low_page(void *adr)
{
if (after_bootmem)
return;
- early_iounmap(adr, PAGE_SIZE);
+ early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
}
static unsigned long __meminit
@@ -386,15 +402,6 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
}
static unsigned long __meminit
-phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
- pgprot_t prot)
-{
- pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
-
- return phys_pte_init(pte, address, end, prot);
-}
-
-static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
unsigned long page_size_mask, pgprot_t prot)
{
@@ -420,8 +427,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
if (pmd_val(*pmd)) {
if (!pmd_large(*pmd)) {
spin_lock(&init_mm.page_table_lock);
- last_map_addr = phys_pte_update(pmd, address,
+ pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
+ last_map_addr = phys_pte_init(pte, address,
end, prot);
+ unmap_low_page(pte);
spin_unlock(&init_mm.page_table_lock);
continue;
}
@@ -468,18 +477,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
}
static unsigned long __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
- unsigned long page_size_mask, pgprot_t prot)
-{
- pmd_t *pmd = pmd_offset(pud, 0);
- unsigned long last_map_addr;
-
- last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
- __flush_tlb_all();
- return last_map_addr;
-}
-
-static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
unsigned long page_size_mask)
{
@@ -504,8 +501,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
if (pud_val(*pud)) {
if (!pud_large(*pud)) {
- last_map_addr = phys_pmd_update(pud, addr, end,
+ pmd = map_low_page(pmd_offset(pud, 0));
+ last_map_addr = phys_pmd_init(pmd, addr, end,
page_size_mask, prot);
+ unmap_low_page(pmd);
+ __flush_tlb_all();
continue;
}
/*
@@ -553,17 +553,6 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
return last_map_addr;
}
-static unsigned long __meminit
-phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
- unsigned long page_size_mask)
-{
- pud_t *pud;
-
- pud = (pud_t *)pgd_page_vaddr(*pgd);
-
- return phys_pud_init(pud, addr, end, page_size_mask);
-}
-
unsigned long __meminit
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
@@ -587,8 +576,10 @@ kernel_physical_mapping_init(unsigned long start,
next = end;
if (pgd_val(*pgd)) {
- last_map_addr = phys_pud_update(pgd, __pa(start),
+ pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
+ last_map_addr = phys_pud_init(pud, __pa(start),
__pa(end), page_size_mask);
+ unmap_low_page(pud);
continue;
}
@@ -612,10 +603,9 @@ kernel_physical_mapping_init(unsigned long start,
}
#ifndef CONFIG_NUMA
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8)
+void __init initmem_init(void)
{
- memblock_x86_register_active_regions(0, start_pfn, end_pfn);
+ memblock_x86_register_active_regions(0, 0, max_pfn);
}
#endif
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ebf6d7887a38..9559d360fde7 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -26,12 +26,50 @@ static __init int numa_setup(char *opt)
early_param("numa", numa_setup);
/*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
*/
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
EXPORT_SYMBOL(node_to_cpumask_map);
/*
+ * Map cpu index to node index
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+
+void __cpuinit numa_set_node(int cpu, int node)
+{
+ int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+
+ /* early setting, no percpu area yet */
+ if (cpu_to_node_map) {
+ cpu_to_node_map[cpu] = node;
+ return;
+ }
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+ if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
+ printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
+ dump_stack();
+ return;
+ }
+#endif
+ per_cpu(x86_cpu_to_node_map, cpu) = node;
+
+ if (node != NUMA_NO_NODE)
+ set_cpu_numa_node(cpu, node);
+}
+
+void __cpuinit numa_clear_node(int cpu)
+{
+ numa_set_node(cpu, NUMA_NO_NODE);
+}
+
+/*
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid.
*
@@ -57,7 +95,174 @@ void __init setup_node_to_cpumask_map(void)
pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
}
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+/*
+ * There are unfortunately some poorly designed mainboards around that
+ * only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ * mapping. To avoid this fill in the mapping for all possible CPUs,
+ * as the number of CPUs is not known yet. We round robin the existing
+ * nodes.
+ */
+void __init numa_init_array(void)
+{
+ int rr, i;
+
+ rr = first_node(node_online_map);
+ for (i = 0; i < nr_cpu_ids; i++) {
+ if (early_cpu_to_node(i) != NUMA_NO_NODE)
+ continue;
+ numa_set_node(i, rr);
+ rr = next_node(rr, node_online_map);
+ if (rr == MAX_NUMNODES)
+ rr = first_node(node_online_map);
+ }
+}
+
+static __init int find_near_online_node(int node)
+{
+ int n, val;
+ int min_val = INT_MAX;
+ int best_node = -1;
+
+ for_each_online_node(n) {
+ val = node_distance(node, n);
+
+ if (val < min_val) {
+ min_val = val;
+ best_node = n;
+ }
+ }
+
+ return best_node;
+}
+
+/*
+ * Setup early cpu_to_node.
+ *
+ * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
+ * and apicid_to_node[] tables have valid entries for a CPU.
+ * This means we skip cpu_to_node[] initialisation for NUMA
+ * emulation and faking node case (when running a kernel compiled
+ * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
+ * is already initialized in a round robin manner at numa_init_array,
+ * prior to this call, and this initialization is good enough
+ * for the fake NUMA cases.
+ *
+ * Called before the per_cpu areas are setup.
+ */
+void __init init_cpu_to_node(void)
+{
+ int cpu;
+ u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+
+ BUG_ON(cpu_to_apicid == NULL);
+
+ for_each_possible_cpu(cpu) {
+ int node = numa_cpu_node(cpu);
+
+ if (node == NUMA_NO_NODE)
+ continue;
+ if (!node_online(node))
+ node = find_near_online_node(node);
+ numa_set_node(cpu, node);
+ }
+}
+
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+
+# ifndef CONFIG_NUMA_EMU
+void __cpuinit numa_add_cpu(int cpu)
+{
+ cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+# endif /* !CONFIG_NUMA_EMU */
+
+#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
+
+int __cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
+ printk(KERN_WARNING
+ "cpu_to_node(%d): usage too early!\n", cpu);
+ dump_stack();
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+EXPORT_SYMBOL(__cpu_to_node);
+
+/*
+ * Same function as cpu_to_node() but used if called before the
+ * per_cpu areas are setup.
+ */
+int early_cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map))
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+
+ if (!cpu_possible(cpu)) {
+ printk(KERN_WARNING
+ "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
+ dump_stack();
+ return NUMA_NO_NODE;
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+
+struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
+{
+ int node = early_cpu_to_node(cpu);
+ struct cpumask *mask;
+ char buf[64];
+
+ if (node == NUMA_NO_NODE) {
+ /* early_cpu_to_node() already emits a warning and trace */
+ return NULL;
+ }
+ mask = node_to_cpumask_map[node];
+ if (!mask) {
+ pr_err("node_to_cpumask_map[%i] NULL\n", node);
+ dump_stack();
+ return NULL;
+ }
+
+ cpulist_scnprintf(buf, sizeof(buf), mask);
+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+ enable ? "numa_add_cpu" : "numa_remove_cpu",
+ cpu, node, buf);
+ return mask;
+}
+
+# ifndef CONFIG_NUMA_EMU
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+ struct cpumask *mask;
+
+ mask = debug_cpumask_set_cpu(cpu, enable);
+ if (!mask)
+ return;
+
+ if (enable)
+ cpumask_set_cpu(cpu, mask);
+ else
+ cpumask_clear_cpu(cpu, mask);
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 0);
+}
+# endif /* !CONFIG_NUMA_EMU */
+
/*
* Returns a pointer to the bitmask of CPUs on Node 'node'.
*/
@@ -80,4 +285,5 @@ const struct cpumask *cpumask_of_node(int node)
return node_to_cpumask_map[node];
}
EXPORT_SYMBOL(cpumask_of_node);
-#endif
+
+#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c9f277..bde3906420df 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
static unsigned long kva_start_pfn;
static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+ return apic->x86_32_numa_cpu_node(cpu);
+}
+
/*
* FLAT - support for basic PC memory model with discontig enabled, essentially
* a single node with all available processors in it with a flat
@@ -346,8 +352,7 @@ static void init_remap_allocator(int nid)
(ulong) node_remap_end_vaddr[nid]);
}
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
- int acpi, int k8)
+void __init initmem_init(void)
{
int nid;
long kva_target_pfn;
@@ -361,6 +366,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
*/
get_memcfg_numa();
+ numa_init_array();
kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea1551eebc..9ec0f209a6a4 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -13,31 +13,30 @@
#include <linux/module.h>
#include <linux/nodemask.h>
#include <linux/sched.h>
+#include <linux/acpi.h>
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/dma.h>
-#include <asm/numa.h>
#include <asm/acpi.h>
#include <asm/amd_nb.h>
+#include "numa_internal.h"
+
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
EXPORT_SYMBOL(node_data);
-struct memnode memnode;
+nodemask_t numa_nodes_parsed __initdata;
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
+struct memnode memnode;
static unsigned long __initdata nodemap_addr;
static unsigned long __initdata nodemap_size;
-/*
- * Map cpu index to node index
- */
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+static struct numa_meminfo numa_meminfo __initdata;
+
+static int numa_distance_cnt;
+static u8 *numa_distance;
/*
* Given a shift value, try to populate memnodemap[]
@@ -46,16 +45,15 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
* 0 if memnodmap[] too small (of shift too small)
* -1 if node overlap or lost ram (shift too big)
*/
-static int __init populate_memnodemap(const struct bootnode *nodes,
- int numnodes, int shift, int *nodeids)
+static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift)
{
unsigned long addr, end;
int i, res = -1;
memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
- for (i = 0; i < numnodes; i++) {
- addr = nodes[i].start;
- end = nodes[i].end;
+ for (i = 0; i < mi->nr_blks; i++) {
+ addr = mi->blk[i].start;
+ end = mi->blk[i].end;
if (addr >= end)
continue;
if ((end >> shift) >= memnodemapsize)
@@ -63,12 +61,7 @@ static int __init populate_memnodemap(const struct bootnode *nodes,
do {
if (memnodemap[addr >> shift] != NUMA_NO_NODE)
return -1;
-
- if (!nodeids)
- memnodemap[addr >> shift] = i;
- else
- memnodemap[addr >> shift] = nodeids[i];
-
+ memnodemap[addr >> shift] = mi->blk[i].nid;
addr += (1UL << shift);
} while (addr < end);
res = 1;
@@ -86,7 +79,7 @@ static int __init allocate_cachealigned_memnodemap(void)
addr = 0x8000;
nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
- nodemap_addr = memblock_find_in_range(addr, max_pfn<<PAGE_SHIFT,
+ nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
nodemap_size, L1_CACHE_BYTES);
if (nodemap_addr == MEMBLOCK_ERROR) {
printk(KERN_ERR
@@ -106,16 +99,15 @@ static int __init allocate_cachealigned_memnodemap(void)
* The LSB of all start and end addresses in the node map is the value of the
* maximum possible shift.
*/
-static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
- int numnodes)
+static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi)
{
int i, nodes_used = 0;
unsigned long start, end;
unsigned long bitfield = 0, memtop = 0;
- for (i = 0; i < numnodes; i++) {
- start = nodes[i].start;
- end = nodes[i].end;
+ for (i = 0; i < mi->nr_blks; i++) {
+ start = mi->blk[i].start;
+ end = mi->blk[i].end;
if (start >= end)
continue;
bitfield |= start;
@@ -131,18 +123,17 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes,
return i;
}
-int __init compute_hash_shift(struct bootnode *nodes, int numnodes,
- int *nodeids)
+static int __init compute_hash_shift(const struct numa_meminfo *mi)
{
int shift;
- shift = extract_lsb_from_nodes(nodes, numnodes);
+ shift = extract_lsb_from_nodes(mi);
if (allocate_cachealigned_memnodemap())
return -1;
printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
shift);
- if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) {
+ if (populate_memnodemap(mi, shift) != 1) {
printk(KERN_INFO "Your memory is not aligned you need to "
"rebuild your kernel with a bigger NODEMAPSIZE "
"shift=%d\n", shift);
@@ -188,6 +179,63 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
return NULL;
}
+static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
+ struct numa_meminfo *mi)
+{
+ /* ignore zero length blks */
+ if (start == end)
+ return 0;
+
+ /* whine about and ignore invalid blks */
+ if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
+ pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n",
+ nid, start, end);
+ return 0;
+ }
+
+ if (mi->nr_blks >= NR_NODE_MEMBLKS) {
+ pr_err("NUMA: too many memblk ranges\n");
+ return -EINVAL;
+ }
+
+ mi->blk[mi->nr_blks].start = start;
+ mi->blk[mi->nr_blks].end = end;
+ mi->blk[mi->nr_blks].nid = nid;
+ mi->nr_blks++;
+ return 0;
+}
+
+/**
+ * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo
+ * @idx: Index of memblk to remove
+ * @mi: numa_meminfo to remove memblk from
+ *
+ * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and
+ * decrementing @mi->nr_blks.
+ */
+void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
+{
+ mi->nr_blks--;
+ memmove(&mi->blk[idx], &mi->blk[idx + 1],
+ (mi->nr_blks - idx) * sizeof(mi->blk[0]));
+}
+
+/**
+ * numa_add_memblk - Add one numa_memblk to numa_meminfo
+ * @nid: NUMA node ID of the new memblk
+ * @start: Start address of the new memblk
+ * @end: End address of the new memblk
+ *
+ * Add a new memblk to the default numa_meminfo.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int __init numa_add_memblk(int nid, u64 start, u64 end)
+{
+ return numa_add_memblk_to(nid, start, end, &numa_meminfo);
+}
+
/* Initialize bootmem allocator for a node */
void __init
setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
@@ -234,696 +282,386 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
node_set_online(nodeid);
}
-/*
- * There are unfortunately some poorly designed mainboards around that
- * only connect memory to a single CPU. This breaks the 1:1 cpu->node
- * mapping. To avoid this fill in the mapping for all possible CPUs,
- * as the number of CPUs is not known yet. We round robin the existing
- * nodes.
+/**
+ * numa_cleanup_meminfo - Cleanup a numa_meminfo
+ * @mi: numa_meminfo to clean up
+ *
+ * Sanitize @mi by merging and removing unncessary memblks. Also check for
+ * conflicts and clear unused memblks.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
*/
-void __init numa_init_array(void)
+int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
{
- int rr, i;
+ const u64 low = 0;
+ const u64 high = (u64)max_pfn << PAGE_SHIFT;
+ int i, j, k;
- rr = first_node(node_online_map);
- for (i = 0; i < nr_cpu_ids; i++) {
- if (early_cpu_to_node(i) != NUMA_NO_NODE)
- continue;
- numa_set_node(i, rr);
- rr = next_node(rr, node_online_map);
- if (rr == MAX_NUMNODES)
- rr = first_node(node_online_map);
- }
-}
-
-#ifdef CONFIG_NUMA_EMU
-/* Numa emulation */
-static struct bootnode nodes[MAX_NUMNODES] __initdata;
-static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata;
-static char *cmdline __initdata;
+ for (i = 0; i < mi->nr_blks; i++) {
+ struct numa_memblk *bi = &mi->blk[i];
-void __init numa_emu_cmdline(char *str)
-{
- cmdline = str;
-}
+ /* make sure all blocks are inside the limits */
+ bi->start = max(bi->start, low);
+ bi->end = min(bi->end, high);
-static int __init setup_physnodes(unsigned long start, unsigned long end,
- int acpi, int amd)
-{
- int ret = 0;
- int i;
-
- memset(physnodes, 0, sizeof(physnodes));
-#ifdef CONFIG_ACPI_NUMA
- if (acpi)
- acpi_get_nodes(physnodes, start, end);
-#endif
-#ifdef CONFIG_AMD_NUMA
- if (amd)
- amd_get_nodes(physnodes);
-#endif
- /*
- * Basic sanity checking on the physical node map: there may be errors
- * if the SRAT or AMD code incorrectly reported the topology or the mem=
- * kernel parameter is used.
- */
- for (i = 0; i < MAX_NUMNODES; i++) {
- if (physnodes[i].start == physnodes[i].end)
- continue;
- if (physnodes[i].start > end) {
- physnodes[i].end = physnodes[i].start;
- continue;
- }
- if (physnodes[i].end < start) {
- physnodes[i].start = physnodes[i].end;
+ /* and there's no empty block */
+ if (bi->start == bi->end) {
+ numa_remove_memblk_from(i--, mi);
continue;
}
- if (physnodes[i].start < start)
- physnodes[i].start = start;
- if (physnodes[i].end > end)
- physnodes[i].end = end;
- ret++;
- }
-
- /*
- * If no physical topology was detected, a single node is faked to cover
- * the entire address space.
- */
- if (!ret) {
- physnodes[ret].start = start;
- physnodes[ret].end = end;
- ret = 1;
- }
- return ret;
-}
-
-static void __init fake_physnodes(int acpi, int amd, int nr_nodes)
-{
- int i;
-
- BUG_ON(acpi && amd);
-#ifdef CONFIG_ACPI_NUMA
- if (acpi)
- acpi_fake_nodes(nodes, nr_nodes);
-#endif
-#ifdef CONFIG_AMD_NUMA
- if (amd)
- amd_fake_nodes(nodes, nr_nodes);
-#endif
- if (!acpi && !amd)
- for (i = 0; i < nr_cpu_ids; i++)
- numa_set_node(i, 0);
-}
-
-/*
- * Setups up nid to range from addr to addr + size. If the end
- * boundary is greater than max_addr, then max_addr is used instead.
- * The return value is 0 if there is additional memory left for
- * allocation past addr and -1 otherwise. addr is adjusted to be at
- * the end of the node.
- */
-static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr)
-{
- int ret = 0;
- nodes[nid].start = *addr;
- *addr += size;
- if (*addr >= max_addr) {
- *addr = max_addr;
- ret = -1;
- }
- nodes[nid].end = *addr;
- node_set(nid, node_possible_map);
- printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
- nodes[nid].start, nodes[nid].end,
- (nodes[nid].end - nodes[nid].start) >> 20);
- return ret;
-}
-
-/*
- * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
- * to max_addr. The return value is the number of nodes allocated.
- */
-static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes)
-{
- nodemask_t physnode_mask = NODE_MASK_NONE;
- u64 size;
- int big;
- int ret = 0;
- int i;
-
- if (nr_nodes <= 0)
- return -1;
- if (nr_nodes > MAX_NUMNODES) {
- pr_info("numa=fake=%d too large, reducing to %d\n",
- nr_nodes, MAX_NUMNODES);
- nr_nodes = MAX_NUMNODES;
- }
-
- size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
- /*
- * Calculate the number of big nodes that can be allocated as a result
- * of consolidating the remainder.
- */
- big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
- FAKE_NODE_MIN_SIZE;
-
- size &= FAKE_NODE_MIN_HASH_MASK;
- if (!size) {
- pr_err("Not enough memory for each node. "
- "NUMA emulation disabled.\n");
- return -1;
- }
- for (i = 0; i < MAX_NUMNODES; i++)
- if (physnodes[i].start != physnodes[i].end)
- node_set(i, physnode_mask);
-
- /*
- * Continue to fill physical nodes with fake nodes until there is no
- * memory left on any of them.
- */
- while (nodes_weight(physnode_mask)) {
- for_each_node_mask(i, physnode_mask) {
- u64 end = physnodes[i].start + size;
- u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
-
- if (ret < big)
- end += FAKE_NODE_MIN_SIZE;
+ for (j = i + 1; j < mi->nr_blks; j++) {
+ struct numa_memblk *bj = &mi->blk[j];
+ unsigned long start, end;
/*
- * Continue to add memory to this fake node if its
- * non-reserved memory is less than the per-node size.
+ * See whether there are overlapping blocks. Whine
+ * about but allow overlaps of the same nid. They
+ * will be merged below.
*/
- while (end - physnodes[i].start -
- memblock_x86_hole_size(physnodes[i].start, end) < size) {
- end += FAKE_NODE_MIN_SIZE;
- if (end > physnodes[i].end) {
- end = physnodes[i].end;
- break;
+ if (bi->end > bj->start && bi->start < bj->end) {
+ if (bi->nid != bj->nid) {
+ pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n",
+ bi->nid, bi->start, bi->end,
+ bj->nid, bj->start, bj->end);
+ return -EINVAL;
}
+ pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n",
+ bi->nid, bi->start, bi->end,
+ bj->start, bj->end);
}
/*
- * If there won't be at least FAKE_NODE_MIN_SIZE of
- * non-reserved memory in ZONE_DMA32 for the next node,
- * this one must extend to the boundary.
+ * Join together blocks on the same node, holes
+ * between which don't overlap with memory on other
+ * nodes.
*/
- if (end < dma32_end && dma32_end - end -
- memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
- end = dma32_end;
-
- /*
- * If there won't be enough non-reserved memory for the
- * next node, this one must extend to the end of the
- * physical node.
- */
- if (physnodes[i].end - end -
- memblock_x86_hole_size(end, physnodes[i].end) < size)
- end = physnodes[i].end;
-
- /*
- * Avoid allocating more nodes than requested, which can
- * happen as a result of rounding down each node's size
- * to FAKE_NODE_MIN_SIZE.
- */
- if (nodes_weight(physnode_mask) + ret >= nr_nodes)
- end = physnodes[i].end;
-
- if (setup_node_range(ret++, &physnodes[i].start,
- end - physnodes[i].start,
- physnodes[i].end) < 0)
- node_clear(i, physnode_mask);
+ if (bi->nid != bj->nid)
+ continue;
+ start = max(min(bi->start, bj->start), low);
+ end = min(max(bi->end, bj->end), high);
+ for (k = 0; k < mi->nr_blks; k++) {
+ struct numa_memblk *bk = &mi->blk[k];
+
+ if (bi->nid == bk->nid)
+ continue;
+ if (start < bk->end && end > bk->start)
+ break;
+ }
+ if (k < mi->nr_blks)
+ continue;
+ printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
+ bi->nid, bi->start, bi->end, bj->start, bj->end,
+ start, end);
+ bi->start = start;
+ bi->end = end;
+ numa_remove_memblk_from(j--, mi);
}
}
- return ret;
-}
-/*
- * Returns the end address of a node so that there is at least `size' amount of
- * non-reserved memory or `max_addr' is reached.
- */
-static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
-{
- u64 end = start + size;
-
- while (end - start - memblock_x86_hole_size(start, end) < size) {
- end += FAKE_NODE_MIN_SIZE;
- if (end > max_addr) {
- end = max_addr;
- break;
- }
+ for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
+ mi->blk[i].start = mi->blk[i].end = 0;
+ mi->blk[i].nid = NUMA_NO_NODE;
}
- return end;
+
+ return 0;
}
/*
- * Sets up fake nodes of `size' interleaved over physical nodes ranging from
- * `addr' to `max_addr'. The return value is the number of nodes allocated.
+ * Set nodes, which have memory in @mi, in *@nodemask.
*/
-static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size)
+static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
+ const struct numa_meminfo *mi)
{
- nodemask_t physnode_mask = NODE_MASK_NONE;
- u64 min_size;
- int ret = 0;
int i;
- if (!size)
- return -1;
- /*
- * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
- * increased accordingly if the requested size is too small. This
- * creates a uniform distribution of node sizes across the entire
- * machine (but not necessarily over physical nodes).
- */
- min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
- MAX_NUMNODES;
- min_size = max(min_size, FAKE_NODE_MIN_SIZE);
- if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
- min_size = (min_size + FAKE_NODE_MIN_SIZE) &
- FAKE_NODE_MIN_HASH_MASK;
- if (size < min_size) {
- pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
- size >> 20, min_size >> 20);
- size = min_size;
- }
- size &= FAKE_NODE_MIN_HASH_MASK;
-
- for (i = 0; i < MAX_NUMNODES; i++)
- if (physnodes[i].start != physnodes[i].end)
- node_set(i, physnode_mask);
- /*
- * Fill physical nodes with fake nodes of size until there is no memory
- * left on any of them.
- */
- while (nodes_weight(physnode_mask)) {
- for_each_node_mask(i, physnode_mask) {
- u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
- u64 end;
-
- end = find_end_of_node(physnodes[i].start,
- physnodes[i].end, size);
- /*
- * If there won't be at least FAKE_NODE_MIN_SIZE of
- * non-reserved memory in ZONE_DMA32 for the next node,
- * this one must extend to the boundary.
- */
- if (end < dma32_end && dma32_end - end -
- memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
- end = dma32_end;
+ for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
+ if (mi->blk[i].start != mi->blk[i].end &&
+ mi->blk[i].nid != NUMA_NO_NODE)
+ node_set(mi->blk[i].nid, *nodemask);
+}
- /*
- * If there won't be enough non-reserved memory for the
- * next node, this one must extend to the end of the
- * physical node.
- */
- if (physnodes[i].end - end -
- memblock_x86_hole_size(end, physnodes[i].end) < size)
- end = physnodes[i].end;
+/**
+ * numa_reset_distance - Reset NUMA distance table
+ *
+ * The current table is freed. The next numa_set_distance() call will
+ * create a new one.
+ */
+void __init numa_reset_distance(void)
+{
+ size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
- /*
- * Setup the fake node that will be allocated as bootmem
- * later. If setup_node_range() returns non-zero, there
- * is no more memory available on this physical node.
- */
- if (setup_node_range(ret++, &physnodes[i].start,
- end - physnodes[i].start,
- physnodes[i].end) < 0)
- node_clear(i, physnode_mask);
- }
- }
- return ret;
+ /* numa_distance could be 1LU marking allocation failure, test cnt */
+ if (numa_distance_cnt)
+ memblock_x86_free_range(__pa(numa_distance),
+ __pa(numa_distance) + size);
+ numa_distance_cnt = 0;
+ numa_distance = NULL; /* enable table creation */
}
-/*
- * Sets up the system RAM area from start_pfn to last_pfn according to the
- * numa=fake command-line option.
- */
-static int __init numa_emulation(unsigned long start_pfn,
- unsigned long last_pfn, int acpi, int amd)
+static int __init numa_alloc_distance(void)
{
- u64 addr = start_pfn << PAGE_SHIFT;
- u64 max_addr = last_pfn << PAGE_SHIFT;
- int num_nodes;
- int i;
+ nodemask_t nodes_parsed;
+ size_t size;
+ int i, j, cnt = 0;
+ u64 phys;
- /*
- * If the numa=fake command-line contains a 'M' or 'G', it represents
- * the fixed node size. Otherwise, if it is just a single number N,
- * split the system RAM into N fake nodes.
- */
- if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) {
- u64 size;
+ /* size the new table and allocate it */
+ nodes_parsed = numa_nodes_parsed;
+ numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
- size = memparse(cmdline, &cmdline);
- num_nodes = split_nodes_size_interleave(addr, max_addr, size);
- } else {
- unsigned long n;
+ for_each_node_mask(i, nodes_parsed)
+ cnt = i;
+ cnt++;
+ size = cnt * cnt * sizeof(numa_distance[0]);
- n = simple_strtoul(cmdline, NULL, 0);
- num_nodes = split_nodes_interleave(addr, max_addr, n);
+ phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT,
+ size, PAGE_SIZE);
+ if (phys == MEMBLOCK_ERROR) {
+ pr_warning("NUMA: Warning: can't allocate distance table!\n");
+ /* don't retry until explicitly reset */
+ numa_distance = (void *)1LU;
+ return -ENOMEM;
}
+ memblock_x86_reserve_range(phys, phys + size, "NUMA DIST");
- if (num_nodes < 0)
- return num_nodes;
- memnode_shift = compute_hash_shift(nodes, num_nodes, NULL);
- if (memnode_shift < 0) {
- memnode_shift = 0;
- printk(KERN_ERR "No NUMA hash function found. NUMA emulation "
- "disabled.\n");
- return -1;
- }
+ numa_distance = __va(phys);
+ numa_distance_cnt = cnt;
+
+ /* fill with the default distances */
+ for (i = 0; i < cnt; i++)
+ for (j = 0; j < cnt; j++)
+ numa_distance[i * cnt + j] = i == j ?
+ LOCAL_DISTANCE : REMOTE_DISTANCE;
+ printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt);
- /*
- * We need to vacate all active ranges that may have been registered for
- * the e820 memory map.
- */
- remove_all_active_ranges();
- for_each_node_mask(i, node_possible_map) {
- memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
- nodes[i].end >> PAGE_SHIFT);
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
- setup_physnodes(addr, max_addr, acpi, amd);
- fake_physnodes(acpi, amd, num_nodes);
- numa_init_array();
return 0;
}
-#endif /* CONFIG_NUMA_EMU */
-void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
- int acpi, int amd)
+/**
+ * numa_set_distance - Set NUMA distance from one NUMA to another
+ * @from: the 'from' node to set distance
+ * @to: the 'to' node to set distance
+ * @distance: NUMA distance
+ *
+ * Set the distance from node @from to @to to @distance. If distance table
+ * doesn't exist, one which is large enough to accomodate all the currently
+ * known nodes will be created.
+ *
+ * If such table cannot be allocated, a warning is printed and further
+ * calls are ignored until the distance table is reset with
+ * numa_reset_distance().
+ *
+ * If @from or @to is higher than the highest known node at the time of
+ * table creation or @distance doesn't make sense, the call is ignored.
+ * This is to allow simplification of specific NUMA config implementations.
+ */
+void __init numa_set_distance(int from, int to, int distance)
{
- int i;
-
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
-
-#ifdef CONFIG_NUMA_EMU
- setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
- acpi, amd);
- if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd))
+ if (!numa_distance && numa_alloc_distance() < 0)
return;
- setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT,
- acpi, amd);
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
-#endif
-#ifdef CONFIG_ACPI_NUMA
- if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
- last_pfn << PAGE_SHIFT))
+ if (from >= numa_distance_cnt || to >= numa_distance_cnt) {
+ printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n",
+ from, to, distance);
return;
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
-#endif
+ }
-#ifdef CONFIG_AMD_NUMA
- if (!numa_off && amd && !amd_scan_nodes())
+ if ((u8)distance != distance ||
+ (from == to && distance != LOCAL_DISTANCE)) {
+ pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+ from, to, distance);
return;
- nodes_clear(node_possible_map);
- nodes_clear(node_online_map);
-#endif
- printk(KERN_INFO "%s\n",
- numa_off ? "NUMA turned off" : "No NUMA configuration found");
+ }
- printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
- start_pfn << PAGE_SHIFT,
- last_pfn << PAGE_SHIFT);
- /* setup dummy node covering all memory */
- memnode_shift = 63;
- memnodemap = memnode.embedded_map;
- memnodemap[0] = 0;
- node_set_online(0);
- node_set(0, node_possible_map);
- for (i = 0; i < nr_cpu_ids; i++)
- numa_set_node(i, 0);
- memblock_x86_register_active_regions(0, start_pfn, last_pfn);
- setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
+ numa_distance[from * numa_distance_cnt + to] = distance;
}
-unsigned long __init numa_free_all_bootmem(void)
+int __node_distance(int from, int to)
{
- unsigned long pages = 0;
- int i;
+ if (from >= numa_distance_cnt || to >= numa_distance_cnt)
+ return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+ return numa_distance[from * numa_distance_cnt + to];
+}
+EXPORT_SYMBOL(__node_distance);
- for_each_online_node(i)
- pages += free_all_bootmem_node(NODE_DATA(i));
+/*
+ * Sanity check to catch more bad NUMA configurations (they are amazingly
+ * common). Make sure the nodes cover all memory.
+ */
+static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi)
+{
+ unsigned long numaram, e820ram;
+ int i;
- pages += free_all_memory_core_early(MAX_NUMNODES);
+ numaram = 0;
+ for (i = 0; i < mi->nr_blks; i++) {
+ unsigned long s = mi->blk[i].start >> PAGE_SHIFT;
+ unsigned long e = mi->blk[i].end >> PAGE_SHIFT;
+ numaram += e - s;
+ numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e);
+ if ((long)numaram < 0)
+ numaram = 0;
+ }
- return pages;
+ e820ram = max_pfn - (memblock_x86_hole_size(0,
+ max_pfn << PAGE_SHIFT) >> PAGE_SHIFT);
+ /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
+ if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) {
+ printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n",
+ (numaram << PAGE_SHIFT) >> 20,
+ (e820ram << PAGE_SHIFT) >> 20);
+ return false;
+ }
+ return true;
}
-#ifdef CONFIG_NUMA
-
-static __init int find_near_online_node(int node)
+static int __init numa_register_memblks(struct numa_meminfo *mi)
{
- int n, val;
- int min_val = INT_MAX;
- int best_node = -1;
+ int i, nid;
- for_each_online_node(n) {
- val = node_distance(node, n);
+ /* Account for nodes with cpus and no memory */
+ node_possible_map = numa_nodes_parsed;
+ numa_nodemask_from_meminfo(&node_possible_map, mi);
+ if (WARN_ON(nodes_empty(node_possible_map)))
+ return -EINVAL;
+
+ memnode_shift = compute_hash_shift(mi);
+ if (memnode_shift < 0) {
+ printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n");
+ return -EINVAL;
+ }
- if (val < min_val) {
- min_val = val;
- best_node = n;
+ for (i = 0; i < mi->nr_blks; i++)
+ memblock_x86_register_active_regions(mi->blk[i].nid,
+ mi->blk[i].start >> PAGE_SHIFT,
+ mi->blk[i].end >> PAGE_SHIFT);
+
+ /* for out of order entries */
+ sort_node_map();
+ if (!numa_meminfo_cover_memory(mi))
+ return -EINVAL;
+
+ /* Finally register nodes. */
+ for_each_node_mask(nid, node_possible_map) {
+ u64 start = (u64)max_pfn << PAGE_SHIFT;
+ u64 end = 0;
+
+ for (i = 0; i < mi->nr_blks; i++) {
+ if (nid != mi->blk[i].nid)
+ continue;
+ start = min(mi->blk[i].start, start);
+ end = max(mi->blk[i].end, end);
}
+
+ if (start < end)
+ setup_node_bootmem(nid, start, end);
}
- return best_node;
+ return 0;
}
-/*
- * Setup early cpu_to_node.
+/**
+ * dummy_numma_init - Fallback dummy NUMA init
*
- * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
- * and apicid_to_node[] tables have valid entries for a CPU.
- * This means we skip cpu_to_node[] initialisation for NUMA
- * emulation and faking node case (when running a kernel compiled
- * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
- * is already initialized in a round robin manner at numa_init_array,
- * prior to this call, and this initialization is good enough
- * for the fake NUMA cases.
+ * Used if there's no underlying NUMA architecture, NUMA initialization
+ * fails, or NUMA is disabled on the command line.
*
- * Called before the per_cpu areas are setup.
+ * Must online at least one node and add memory blocks that cover all
+ * allowed memory. This function must not fail.
*/
-void __init init_cpu_to_node(void)
+static int __init dummy_numa_init(void)
{
- int cpu;
- u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-
- BUG_ON(cpu_to_apicid == NULL);
+ printk(KERN_INFO "%s\n",
+ numa_off ? "NUMA turned off" : "No NUMA configuration found");
+ printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
+ 0LU, max_pfn << PAGE_SHIFT);
- for_each_possible_cpu(cpu) {
- int node;
- u16 apicid = cpu_to_apicid[cpu];
+ node_set(0, numa_nodes_parsed);
+ numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT);
- if (apicid == BAD_APICID)
- continue;
- node = apicid_to_node[apicid];
- if (node == NUMA_NO_NODE)
- continue;
- if (!node_online(node))
- node = find_near_online_node(node);
- numa_set_node(cpu, node);
- }
+ return 0;
}
-#endif
-
-void __cpuinit numa_set_node(int cpu, int node)
+static int __init numa_init(int (*init_func)(void))
{
- int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
-
- /* early setting, no percpu area yet */
- if (cpu_to_node_map) {
- cpu_to_node_map[cpu] = node;
- return;
- }
-
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
- if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
- printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
- dump_stack();
- return;
- }
-#endif
- per_cpu(x86_cpu_to_node_map, cpu) = node;
+ int i;
+ int ret;
- if (node != NUMA_NO_NODE)
- set_cpu_numa_node(cpu, node);
-}
+ for (i = 0; i < MAX_LOCAL_APIC; i++)
+ set_apicid_to_node(i, NUMA_NO_NODE);
-void __cpuinit numa_clear_node(int cpu)
-{
- numa_set_node(cpu, NUMA_NO_NODE);
-}
+ nodes_clear(numa_nodes_parsed);
+ nodes_clear(node_possible_map);
+ nodes_clear(node_online_map);
+ memset(&numa_meminfo, 0, sizeof(numa_meminfo));
+ remove_all_active_ranges();
+ numa_reset_distance();
-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+ ret = init_func();
+ if (ret < 0)
+ return ret;
+ ret = numa_cleanup_meminfo(&numa_meminfo);
+ if (ret < 0)
+ return ret;
-#ifndef CONFIG_NUMA_EMU
-void __cpuinit numa_add_cpu(int cpu)
-{
- cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
+ numa_emulation(&numa_meminfo, numa_distance_cnt);
-void __cpuinit numa_remove_cpu(int cpu)
-{
- cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-#else
-void __cpuinit numa_add_cpu(int cpu)
-{
- unsigned long addr;
- u16 apicid;
- int physnid;
- int nid = NUMA_NO_NODE;
+ ret = numa_register_memblks(&numa_meminfo);
+ if (ret < 0)
+ return ret;
- apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
- if (apicid != BAD_APICID)
- nid = apicid_to_node[apicid];
- if (nid == NUMA_NO_NODE)
- nid = early_cpu_to_node(cpu);
- BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
-
- /*
- * Use the starting address of the emulated node to find which physical
- * node it is allocated on.
- */
- addr = node_start_pfn(nid) << PAGE_SHIFT;
- for (physnid = 0; physnid < MAX_NUMNODES; physnid++)
- if (addr >= physnodes[physnid].start &&
- addr < physnodes[physnid].end)
- break;
+ for (i = 0; i < nr_cpu_ids; i++) {
+ int nid = early_cpu_to_node(i);
- /*
- * Map the cpu to each emulated node that is allocated on the physical
- * node of the cpu's apic id.
- */
- for_each_online_node(nid) {
- addr = node_start_pfn(nid) << PAGE_SHIFT;
- if (addr >= physnodes[physnid].start &&
- addr < physnodes[physnid].end)
- cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
+ if (nid == NUMA_NO_NODE)
+ continue;
+ if (!node_online(nid))
+ numa_clear_node(i);
}
+ numa_init_array();
+ return 0;
}
-void __cpuinit numa_remove_cpu(int cpu)
+void __init initmem_init(void)
{
- int i;
+ int ret;
- for_each_online_node(i)
- cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
-}
-#endif /* !CONFIG_NUMA_EMU */
-
-#else /* CONFIG_DEBUG_PER_CPU_MAPS */
-static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
-{
- int node = early_cpu_to_node(cpu);
- struct cpumask *mask;
- char buf[64];
-
- mask = node_to_cpumask_map[node];
- if (!mask) {
- pr_err("node_to_cpumask_map[%i] NULL\n", node);
- dump_stack();
- return NULL;
+ if (!numa_off) {
+#ifdef CONFIG_ACPI_NUMA
+ ret = numa_init(x86_acpi_numa_init);
+ if (!ret)
+ return;
+#endif
+#ifdef CONFIG_AMD_NUMA
+ ret = numa_init(amd_numa_init);
+ if (!ret)
+ return;
+#endif
}
- cpulist_scnprintf(buf, sizeof(buf), mask);
- printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
- enable ? "numa_add_cpu" : "numa_remove_cpu",
- cpu, node, buf);
- return mask;
+ numa_init(dummy_numa_init);
}
-/*
- * --------- debug versions of the numa functions ---------
- */
-#ifndef CONFIG_NUMA_EMU
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
-{
- struct cpumask *mask;
-
- mask = debug_cpumask_set_cpu(cpu, enable);
- if (!mask)
- return;
-
- if (enable)
- cpumask_set_cpu(cpu, mask);
- else
- cpumask_clear_cpu(cpu, mask);
-}
-#else
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
+unsigned long __init numa_free_all_bootmem(void)
{
- int node = early_cpu_to_node(cpu);
- struct cpumask *mask;
+ unsigned long pages = 0;
int i;
- for_each_online_node(i) {
- unsigned long addr;
-
- addr = node_start_pfn(i) << PAGE_SHIFT;
- if (addr < physnodes[node].start ||
- addr >= physnodes[node].end)
- continue;
- mask = debug_cpumask_set_cpu(cpu, enable);
- if (!mask)
- return;
-
- if (enable)
- cpumask_set_cpu(cpu, mask);
- else
- cpumask_clear_cpu(cpu, mask);
- }
-}
-#endif /* CONFIG_NUMA_EMU */
+ for_each_online_node(i)
+ pages += free_all_bootmem_node(NODE_DATA(i));
-void __cpuinit numa_add_cpu(int cpu)
-{
- numa_set_cpumask(cpu, 1);
-}
+ pages += free_all_memory_core_early(MAX_NUMNODES);
-void __cpuinit numa_remove_cpu(int cpu)
-{
- numa_set_cpumask(cpu, 0);
+ return pages;
}
-int __cpu_to_node(int cpu)
+int __cpuinit numa_cpu_node(int cpu)
{
- if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
- printk(KERN_WARNING
- "cpu_to_node(%d): usage too early!\n", cpu);
- dump_stack();
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
- }
- return per_cpu(x86_cpu_to_node_map, cpu);
-}
-EXPORT_SYMBOL(__cpu_to_node);
+ int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
-/*
- * Same function as cpu_to_node() but used if called before the
- * per_cpu areas are setup.
- */
-int early_cpu_to_node(int cpu)
-{
- if (early_per_cpu_ptr(x86_cpu_to_node_map))
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-
- if (!cpu_possible(cpu)) {
- printk(KERN_WARNING
- "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
- dump_stack();
- return NUMA_NO_NODE;
- }
- return per_cpu(x86_cpu_to_node_map, cpu);
+ if (apicid != BAD_APICID)
+ return __apicid_to_node[apicid];
+ return NUMA_NO_NODE;
}
-
-/*
- * --------- end of debug versions of the numa functions ---------
- */
-
-#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
new file mode 100644
index 000000000000..ad091e4cff17
--- /dev/null
+++ b/arch/x86/mm/numa_emulation.c
@@ -0,0 +1,494 @@
+/*
+ * NUMA emulation
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/topology.h>
+#include <linux/memblock.h>
+#include <asm/dma.h>
+
+#include "numa_internal.h"
+
+static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata;
+static char *emu_cmdline __initdata;
+
+void __init numa_emu_cmdline(char *str)
+{
+ emu_cmdline = str;
+}
+
+static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
+{
+ int i;
+
+ for (i = 0; i < mi->nr_blks; i++)
+ if (mi->blk[i].nid == nid)
+ return i;
+ return -ENOENT;
+}
+
+/*
+ * Sets up nid to range from @start to @end. The return value is -errno if
+ * something went wrong, 0 otherwise.
+ */
+static int __init emu_setup_memblk(struct numa_meminfo *ei,
+ struct numa_meminfo *pi,
+ int nid, int phys_blk, u64 size)
+{
+ struct numa_memblk *eb = &ei->blk[ei->nr_blks];
+ struct numa_memblk *pb = &pi->blk[phys_blk];
+
+ if (ei->nr_blks >= NR_NODE_MEMBLKS) {
+ pr_err("NUMA: Too many emulated memblks, failing emulation\n");
+ return -EINVAL;
+ }
+
+ ei->nr_blks++;
+ eb->start = pb->start;
+ eb->end = pb->start + size;
+ eb->nid = nid;
+
+ if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
+ emu_nid_to_phys[nid] = pb->nid;
+
+ pb->start += size;
+ if (pb->start >= pb->end) {
+ WARN_ON_ONCE(pb->start > pb->end);
+ numa_remove_memblk_from(phys_blk, pi);
+ }
+
+ printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid,
+ eb->start, eb->end, (eb->end - eb->start) >> 20);
+ return 0;
+}
+
+/*
+ * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
+ * to max_addr. The return value is the number of nodes allocated.
+ */
+static int __init split_nodes_interleave(struct numa_meminfo *ei,
+ struct numa_meminfo *pi,
+ u64 addr, u64 max_addr, int nr_nodes)
+{
+ nodemask_t physnode_mask = NODE_MASK_NONE;
+ u64 size;
+ int big;
+ int nid = 0;
+ int i, ret;
+
+ if (nr_nodes <= 0)
+ return -1;
+ if (nr_nodes > MAX_NUMNODES) {
+ pr_info("numa=fake=%d too large, reducing to %d\n",
+ nr_nodes, MAX_NUMNODES);
+ nr_nodes = MAX_NUMNODES;
+ }
+
+ size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes;
+ /*
+ * Calculate the number of big nodes that can be allocated as a result
+ * of consolidating the remainder.
+ */
+ big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) /
+ FAKE_NODE_MIN_SIZE;
+
+ size &= FAKE_NODE_MIN_HASH_MASK;
+ if (!size) {
+ pr_err("Not enough memory for each node. "
+ "NUMA emulation disabled.\n");
+ return -1;
+ }
+
+ for (i = 0; i < pi->nr_blks; i++)
+ node_set(pi->blk[i].nid, physnode_mask);
+
+ /*
+ * Continue to fill physical nodes with fake nodes until there is no
+ * memory left on any of them.
+ */
+ while (nodes_weight(physnode_mask)) {
+ for_each_node_mask(i, physnode_mask) {
+ u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN);
+ u64 start, limit, end;
+ int phys_blk;
+
+ phys_blk = emu_find_memblk_by_nid(i, pi);
+ if (phys_blk < 0) {
+ node_clear(i, physnode_mask);
+ continue;
+ }
+ start = pi->blk[phys_blk].start;
+ limit = pi->blk[phys_blk].end;
+ end = start + size;
+
+ if (nid < big)
+ end += FAKE_NODE_MIN_SIZE;
+
+ /*
+ * Continue to add memory to this fake node if its
+ * non-reserved memory is less than the per-node size.
+ */
+ while (end - start -
+ memblock_x86_hole_size(start, end) < size) {
+ end += FAKE_NODE_MIN_SIZE;
+ if (end > limit) {
+ end = limit;
+ break;
+ }
+ }
+
+ /*
+ * If there won't be at least FAKE_NODE_MIN_SIZE of
+ * non-reserved memory in ZONE_DMA32 for the next node,
+ * this one must extend to the boundary.
+ */
+ if (end < dma32_end && dma32_end - end -
+ memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+ end = dma32_end;
+
+ /*
+ * If there won't be enough non-reserved memory for the
+ * next node, this one must extend to the end of the
+ * physical node.
+ */
+ if (limit - end -
+ memblock_x86_hole_size(end, limit) < size)
+ end = limit;
+
+ ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes,
+ phys_blk,
+ min(end, limit) - start);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Returns the end address of a node so that there is at least `size' amount of
+ * non-reserved memory or `max_addr' is reached.
+ */
+static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size)
+{
+ u64 end = start + size;
+
+ while (end - start - memblock_x86_hole_size(start, end) < size) {
+ end += FAKE_NODE_MIN_SIZE;
+ if (end > max_addr) {
+ end = max_addr;
+ break;
+ }
+ }
+ return end;
+}
+
+/*
+ * Sets up fake nodes of `size' interleaved over physical nodes ranging from
+ * `addr' to `max_addr'. The return value is the number of nodes allocated.
+ */
+static int __init split_nodes_size_interleave(struct numa_meminfo *ei,
+ struct numa_meminfo *pi,
+ u64 addr, u64 max_addr, u64 size)
+{
+ nodemask_t physnode_mask = NODE_MASK_NONE;
+ u64 min_size;
+ int nid = 0;
+ int i, ret;
+
+ if (!size)
+ return -1;
+ /*
+ * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
+ * increased accordingly if the requested size is too small. This
+ * creates a uniform distribution of node sizes across the entire
+ * machine (but not necessarily over physical nodes).
+ */
+ min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) /
+ MAX_NUMNODES;
+ min_size = max(min_size, FAKE_NODE_MIN_SIZE);
+ if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size)
+ min_size = (min_size + FAKE_NODE_MIN_SIZE) &
+ FAKE_NODE_MIN_HASH_MASK;
+ if (size < min_size) {
+ pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
+ size >> 20, min_size >> 20);
+ size = min_size;
+ }
+ size &= FAKE_NODE_MIN_HASH_MASK;
+
+ for (i = 0; i < pi->nr_blks; i++)
+ node_set(pi->blk[i].nid, physnode_mask);
+
+ /*
+ * Fill physical nodes with fake nodes of size until there is no memory
+ * left on any of them.
+ */
+ while (nodes_weight(physnode_mask)) {
+ for_each_node_mask(i, physnode_mask) {
+ u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT;
+ u64 start, limit, end;
+ int phys_blk;
+
+ phys_blk = emu_find_memblk_by_nid(i, pi);
+ if (phys_blk < 0) {
+ node_clear(i, physnode_mask);
+ continue;
+ }
+ start = pi->blk[phys_blk].start;
+ limit = pi->blk[phys_blk].end;
+
+ end = find_end_of_node(start, limit, size);
+ /*
+ * If there won't be at least FAKE_NODE_MIN_SIZE of
+ * non-reserved memory in ZONE_DMA32 for the next node,
+ * this one must extend to the boundary.
+ */
+ if (end < dma32_end && dma32_end - end -
+ memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE)
+ end = dma32_end;
+
+ /*
+ * If there won't be enough non-reserved memory for the
+ * next node, this one must extend to the end of the
+ * physical node.
+ */
+ if (limit - end -
+ memblock_x86_hole_size(end, limit) < size)
+ end = limit;
+
+ ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES,
+ phys_blk,
+ min(end, limit) - start);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/**
+ * numa_emulation - Emulate NUMA nodes
+ * @numa_meminfo: NUMA configuration to massage
+ * @numa_dist_cnt: The size of the physical NUMA distance table
+ *
+ * Emulate NUMA nodes according to the numa=fake kernel parameter.
+ * @numa_meminfo contains the physical memory configuration and is modified
+ * to reflect the emulated configuration on success. @numa_dist_cnt is
+ * used to determine the size of the physical distance table.
+ *
+ * On success, the following modifications are made.
+ *
+ * - @numa_meminfo is updated to reflect the emulated nodes.
+ *
+ * - __apicid_to_node[] is updated such that APIC IDs are mapped to the
+ * emulated nodes.
+ *
+ * - NUMA distance table is rebuilt to represent distances between emulated
+ * nodes. The distances are determined considering how emulated nodes
+ * are mapped to physical nodes and match the actual distances.
+ *
+ * - emu_nid_to_phys[] reflects how emulated nodes are mapped to physical
+ * nodes. This is used by numa_add_cpu() and numa_remove_cpu().
+ *
+ * If emulation is not enabled or fails, emu_nid_to_phys[] is filled with
+ * identity mapping and no other modification is made.
+ */
+void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
+{
+ static struct numa_meminfo ei __initdata;
+ static struct numa_meminfo pi __initdata;
+ const u64 max_addr = max_pfn << PAGE_SHIFT;
+ u8 *phys_dist = NULL;
+ size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]);
+ int max_emu_nid, dfl_phys_nid;
+ int i, j, ret;
+
+ if (!emu_cmdline)
+ goto no_emu;
+
+ memset(&ei, 0, sizeof(ei));
+ pi = *numa_meminfo;
+
+ for (i = 0; i < MAX_NUMNODES; i++)
+ emu_nid_to_phys[i] = NUMA_NO_NODE;
+
+ /*
+ * If the numa=fake command-line contains a 'M' or 'G', it represents
+ * the fixed node size. Otherwise, if it is just a single number N,
+ * split the system RAM into N fake nodes.
+ */
+ if (strchr(emu_cmdline, 'M') || strchr(emu_cmdline, 'G')) {
+ u64 size;
+
+ size = memparse(emu_cmdline, &emu_cmdline);
+ ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size);
+ } else {
+ unsigned long n;
+
+ n = simple_strtoul(emu_cmdline, NULL, 0);
+ ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n);
+ }
+
+ if (ret < 0)
+ goto no_emu;
+
+ if (numa_cleanup_meminfo(&ei) < 0) {
+ pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
+ goto no_emu;
+ }
+
+ /* copy the physical distance table */
+ if (numa_dist_cnt) {
+ u64 phys;
+
+ phys = memblock_find_in_range(0,
+ (u64)max_pfn_mapped << PAGE_SHIFT,
+ phys_size, PAGE_SIZE);
+ if (phys == MEMBLOCK_ERROR) {
+ pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
+ goto no_emu;
+ }
+ memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST");
+ phys_dist = __va(phys);
+
+ for (i = 0; i < numa_dist_cnt; i++)
+ for (j = 0; j < numa_dist_cnt; j++)
+ phys_dist[i * numa_dist_cnt + j] =
+ node_distance(i, j);
+ }
+
+ /*
+ * Determine the max emulated nid and the default phys nid to use
+ * for unmapped nodes.
+ */
+ max_emu_nid = 0;
+ dfl_phys_nid = NUMA_NO_NODE;
+ for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) {
+ if (emu_nid_to_phys[i] != NUMA_NO_NODE) {
+ max_emu_nid = i;
+ if (dfl_phys_nid == NUMA_NO_NODE)
+ dfl_phys_nid = emu_nid_to_phys[i];
+ }
+ }
+ if (dfl_phys_nid == NUMA_NO_NODE) {
+ pr_warning("NUMA: Warning: can't determine default physical node, disabling emulation\n");
+ goto no_emu;
+ }
+
+ /* commit */
+ *numa_meminfo = ei;
+
+ /*
+ * Transform __apicid_to_node table to use emulated nids by
+ * reverse-mapping phys_nid. The maps should always exist but fall
+ * back to zero just in case.
+ */
+ for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) {
+ if (__apicid_to_node[i] == NUMA_NO_NODE)
+ continue;
+ for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++)
+ if (__apicid_to_node[i] == emu_nid_to_phys[j])
+ break;
+ __apicid_to_node[i] = j < ARRAY_SIZE(emu_nid_to_phys) ? j : 0;
+ }
+
+ /* make sure all emulated nodes are mapped to a physical node */
+ for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
+ if (emu_nid_to_phys[i] == NUMA_NO_NODE)
+ emu_nid_to_phys[i] = dfl_phys_nid;
+
+ /* transform distance table */
+ numa_reset_distance();
+ for (i = 0; i < max_emu_nid + 1; i++) {
+ for (j = 0; j < max_emu_nid + 1; j++) {
+ int physi = emu_nid_to_phys[i];
+ int physj = emu_nid_to_phys[j];
+ int dist;
+
+ if (physi >= numa_dist_cnt || physj >= numa_dist_cnt)
+ dist = physi == physj ?
+ LOCAL_DISTANCE : REMOTE_DISTANCE;
+ else
+ dist = phys_dist[physi * numa_dist_cnt + physj];
+
+ numa_set_distance(i, j, dist);
+ }
+ }
+
+ /* free the copied physical distance table */
+ if (phys_dist)
+ memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size);
+ return;
+
+no_emu:
+ /* No emulation. Build identity emu_nid_to_phys[] for numa_add_cpu() */
+ for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++)
+ emu_nid_to_phys[i] = i;
+}
+
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+void __cpuinit numa_add_cpu(int cpu)
+{
+ int physnid, nid;
+
+ nid = early_cpu_to_node(cpu);
+ BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
+
+ physnid = emu_nid_to_phys[nid];
+
+ /*
+ * Map the cpu to each emulated node that is allocated on the physical
+ * node of the cpu's apic id.
+ */
+ for_each_online_node(nid)
+ if (emu_nid_to_phys[nid] == physnid)
+ cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ int i;
+
+ for_each_online_node(i)
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
+}
+#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+ struct cpumask *mask;
+ int nid, physnid, i;
+
+ nid = early_cpu_to_node(cpu);
+ if (nid == NUMA_NO_NODE) {
+ /* early_cpu_to_node() already emits a warning and trace */
+ return;
+ }
+
+ physnid = emu_nid_to_phys[nid];
+
+ for_each_online_node(i) {
+ if (emu_nid_to_phys[nid] != physnid)
+ continue;
+
+ mask = debug_cpumask_set_cpu(cpu, enable);
+ if (!mask)
+ return;
+
+ if (enable)
+ cpumask_set_cpu(cpu, mask);
+ else
+ cpumask_clear_cpu(cpu, mask);
+ }
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 0);
+}
+#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h
new file mode 100644
index 000000000000..ef2d97377d7c
--- /dev/null
+++ b/arch/x86/mm/numa_internal.h
@@ -0,0 +1,31 @@
+#ifndef __X86_MM_NUMA_INTERNAL_H
+#define __X86_MM_NUMA_INTERNAL_H
+
+#include <linux/types.h>
+#include <asm/numa.h>
+
+struct numa_memblk {
+ u64 start;
+ u64 end;
+ int nid;
+};
+
+struct numa_meminfo {
+ int nr_blks;
+ struct numa_memblk blk[NR_NODE_MEMBLKS];
+};
+
+void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi);
+int __init numa_cleanup_meminfo(struct numa_meminfo *mi);
+void __init numa_reset_distance(void);
+
+#ifdef CONFIG_NUMA_EMU
+void __init numa_emulation(struct numa_meminfo *numa_meminfo,
+ int numa_dist_cnt);
+#else
+static inline void numa_emulation(struct numa_meminfo *numa_meminfo,
+ int numa_dist_cnt)
+{ }
+#endif
+
+#endif /* __X86_MM_NUMA_INTERNAL_H */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index d343b3c81f3c..90825f2eb0f4 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM];
void update_page_count(int level, unsigned long pages)
{
- unsigned long flags;
-
/* Protect against CPA */
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
direct_pages_count[level] += pages;
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
static void split_page_count(int level)
@@ -394,7 +392,7 @@ static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
- unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
+ unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
int i, do_split = 1;
@@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
if (cpa->force_split)
return 1;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
/*
* Check for races, another CPU might have split this page
* up already:
@@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
}
out_unlock:
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return do_split;
}
static int split_large_page(pte_t *kpte, unsigned long address)
{
- unsigned long flags, pfn, pfninc = 1;
+ unsigned long pfn, pfninc = 1;
unsigned int i, level;
pte_t *pbase, *tmp;
pgprot_t ref_prot;
@@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
if (!base)
return -ENOMEM;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -591,7 +589,7 @@ out_unlock:
*/
if (base)
__free_page(base);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return 0;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 500242d3c96d..0113d19c8aa6 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
static void pgd_dtor(pgd_t *pgd)
{
- unsigned long flags; /* can be called from interrupt context */
-
if (SHARED_KERNEL_PMD)
return;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
pgd_list_del(pgd);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
/*
@@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *pmds[PREALLOCATED_PMDS];
- unsigned long flags;
pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
@@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
* respect to anything walking the pgd_list, so that they
* never see a partially populated pgd.
*/
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
pgd_ctor(mm, pgd);
pgd_prepopulate_pmd(mm, pgd, pmds);
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
return pgd;
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index ae96e7b8051d..48651c6f657d 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -57,7 +57,7 @@ struct node_memory_chunk_s {
static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
static int __initdata num_memory_chunks; /* total number of memory chunks */
-static u8 __initdata apicid_to_pxm[MAX_APICID];
+static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
int acpi_numa __initdata;
@@ -254,8 +254,8 @@ int __init get_memcfg_from_srat(void)
printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
num_memory_chunks);
- for (i = 0; i < MAX_APICID; i++)
- apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+ for (i = 0; i < MAX_LOCAL_APIC; i++)
+ set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
for (j = 0; j < num_memory_chunks; j++){
struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 603d285d1daa..8e9d3394f6d4 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -26,88 +26,34 @@
int acpi_numa __initdata;
-static struct acpi_table_slit *acpi_slit;
-
-static nodemask_t nodes_parsed __initdata;
-static nodemask_t cpu_nodes_parsed __initdata;
-static struct bootnode nodes[MAX_NUMNODES] __initdata;
static struct bootnode nodes_add[MAX_NUMNODES];
-static int num_node_memblks __initdata;
-static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
-static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
-
static __init int setup_node(int pxm)
{
return acpi_map_pxm_to_node(pxm);
}
-static __init int conflicting_memblks(unsigned long start, unsigned long end)
-{
- int i;
- for (i = 0; i < num_node_memblks; i++) {
- struct bootnode *nd = &node_memblk_range[i];
- if (nd->start == nd->end)
- continue;
- if (nd->end > start && nd->start < end)
- return memblk_nodeid[i];
- if (nd->end == end && nd->start == start)
- return memblk_nodeid[i];
- }
- return -1;
-}
-
-static __init void cutoff_node(int i, unsigned long start, unsigned long end)
-{
- struct bootnode *nd = &nodes[i];
-
- if (nd->start < start) {
- nd->start = start;
- if (nd->end < nd->start)
- nd->start = nd->end;
- }
- if (nd->end > end) {
- nd->end = end;
- if (nd->start > nd->end)
- nd->start = nd->end;
- }
-}
-
static __init void bad_srat(void)
{
- int i;
printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1;
- for (i = 0; i < MAX_LOCAL_APIC; i++)
- apicid_to_node[i] = NUMA_NO_NODE;
- for (i = 0; i < MAX_NUMNODES; i++) {
- nodes[i].start = nodes[i].end = 0;
- nodes_add[i].start = nodes_add[i].end = 0;
- }
- remove_all_active_ranges();
+ memset(nodes_add, 0, sizeof(nodes_add));
}
static __init inline int srat_disabled(void)
{
- return numa_off || acpi_numa < 0;
+ return acpi_numa < 0;
}
/* Callback for SLIT parsing */
void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
{
- unsigned length;
- unsigned long phys;
-
- length = slit->header.length;
- phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
- PAGE_SIZE);
-
- if (phys == MEMBLOCK_ERROR)
- panic(" Can not save slit!\n");
+ int i, j;
- acpi_slit = __va(phys);
- memcpy(acpi_slit, slit, length);
- memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT");
+ for (i = 0; i < slit->locality_count; i++)
+ for (j = 0; j < slit->locality_count; j++)
+ numa_set_distance(pxm_to_node(i), pxm_to_node(j),
+ slit->entry[slit->locality_count * i + j]);
}
/* Callback for Proximity Domain -> x2APIC mapping */
@@ -138,8 +84,8 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
return;
}
- apicid_to_node[apic_id] = node;
- node_set(node, cpu_nodes_parsed);
+ set_apicid_to_node(apic_id, node);
+ node_set(node, numa_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
pxm, apic_id, node);
@@ -178,8 +124,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
return;
}
- apicid_to_node[apic_id] = node;
- node_set(node, cpu_nodes_parsed);
+ set_apicid_to_node(apic_id, node);
+ node_set(node, numa_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
pxm, apic_id, node);
@@ -241,7 +187,7 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
}
if (changed) {
- node_set(node, cpu_nodes_parsed);
+ node_set(node, numa_nodes_parsed);
printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
nd->start, nd->end);
}
@@ -251,10 +197,8 @@ update_nodes_add(int node, unsigned long start, unsigned long end)
void __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
- struct bootnode *nd, oldnode;
unsigned long start, end;
int node, pxm;
- int i;
if (srat_disabled())
return;
@@ -276,300 +220,31 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
bad_srat();
return;
}
- i = conflicting_memblks(start, end);
- if (i == node) {
- printk(KERN_WARNING
- "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
- pxm, start, end, nodes[i].start, nodes[i].end);
- } else if (i >= 0) {
- printk(KERN_ERR
- "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
- pxm, start, end, node_to_pxm(i),
- nodes[i].start, nodes[i].end);
+
+ if (numa_add_memblk(node, start, end) < 0) {
bad_srat();
return;
}
- nd = &nodes[node];
- oldnode = *nd;
- if (!node_test_and_set(node, nodes_parsed)) {
- nd->start = start;
- nd->end = end;
- } else {
- if (start < nd->start)
- nd->start = start;
- if (nd->end < end)
- nd->end = end;
- }
printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
start, end);
- if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+ if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)
update_nodes_add(node, start, end);
- /* restore nodes[node] */
- *nd = oldnode;
- if ((nd->start | nd->end) == 0)
- node_clear(node, nodes_parsed);
- }
-
- node_memblk_range[num_node_memblks].start = start;
- node_memblk_range[num_node_memblks].end = end;
- memblk_nodeid[num_node_memblks] = node;
- num_node_memblks++;
-}
-
-/* Sanity check to catch more bad SRATs (they are amazingly common).
- Make sure the PXMs cover all memory. */
-static int __init nodes_cover_memory(const struct bootnode *nodes)
-{
- int i;
- unsigned long pxmram, e820ram;
-
- pxmram = 0;
- for_each_node_mask(i, nodes_parsed) {
- unsigned long s = nodes[i].start >> PAGE_SHIFT;
- unsigned long e = nodes[i].end >> PAGE_SHIFT;
- pxmram += e - s;
- pxmram -= __absent_pages_in_range(i, s, e);
- if ((long)pxmram < 0)
- pxmram = 0;
- }
-
- e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
- /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
- if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
- printk(KERN_ERR
- "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
- (pxmram << PAGE_SHIFT) >> 20,
- (e820ram << PAGE_SHIFT) >> 20);
- return 0;
- }
- return 1;
}
void __init acpi_numa_arch_fixup(void) {}
-#ifdef CONFIG_NUMA_EMU
-void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
- unsigned long end)
-{
- int i;
-
- for_each_node_mask(i, nodes_parsed) {
- cutoff_node(i, start, end);
- physnodes[i].start = nodes[i].start;
- physnodes[i].end = nodes[i].end;
- }
-}
-#endif /* CONFIG_NUMA_EMU */
-
-/* Use the information discovered above to actually set up the nodes. */
-int __init acpi_scan_nodes(unsigned long start, unsigned long end)
+int __init x86_acpi_numa_init(void)
{
- int i;
-
- if (acpi_numa <= 0)
- return -1;
-
- /* First clean up the node list */
- for (i = 0; i < MAX_NUMNODES; i++)
- cutoff_node(i, start, end);
-
- /*
- * Join together blocks on the same node, holes between
- * which don't overlap with memory on other nodes.
- */
- for (i = 0; i < num_node_memblks; ++i) {
- int j, k;
-
- for (j = i + 1; j < num_node_memblks; ++j) {
- unsigned long start, end;
-
- if (memblk_nodeid[i] != memblk_nodeid[j])
- continue;
- start = min(node_memblk_range[i].end,
- node_memblk_range[j].end);
- end = max(node_memblk_range[i].start,
- node_memblk_range[j].start);
- for (k = 0; k < num_node_memblks; ++k) {
- if (memblk_nodeid[i] == memblk_nodeid[k])
- continue;
- if (start < node_memblk_range[k].end &&
- end > node_memblk_range[k].start)
- break;
- }
- if (k < num_node_memblks)
- continue;
- start = min(node_memblk_range[i].start,
- node_memblk_range[j].start);
- end = max(node_memblk_range[i].end,
- node_memblk_range[j].end);
- printk(KERN_INFO "SRAT: Node %d "
- "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
- memblk_nodeid[i],
- node_memblk_range[i].start,
- node_memblk_range[i].end,
- node_memblk_range[j].start,
- node_memblk_range[j].end,
- start, end);
- node_memblk_range[i].start = start;
- node_memblk_range[i].end = end;
- k = --num_node_memblks - j;
- memmove(memblk_nodeid + j, memblk_nodeid + j+1,
- k * sizeof(*memblk_nodeid));
- memmove(node_memblk_range + j, node_memblk_range + j+1,
- k * sizeof(*node_memblk_range));
- --j;
- }
- }
-
- memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
- memblk_nodeid);
- if (memnode_shift < 0) {
- printk(KERN_ERR
- "SRAT: No NUMA node hash function found. Contact maintainer\n");
- bad_srat();
- return -1;
- }
-
- for (i = 0; i < num_node_memblks; i++)
- memblock_x86_register_active_regions(memblk_nodeid[i],
- node_memblk_range[i].start >> PAGE_SHIFT,
- node_memblk_range[i].end >> PAGE_SHIFT);
-
- /* for out of order entries in SRAT */
- sort_node_map();
- if (!nodes_cover_memory(nodes)) {
- bad_srat();
- return -1;
- }
+ int ret;
- /* Account for nodes with cpus and no memory */
- nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
-
- /* Finally register nodes */
- for_each_node_mask(i, node_possible_map)
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- /* Try again in case setup_node_bootmem missed one due
- to missing bootmem */
- for_each_node_mask(i, node_possible_map)
- if (!node_online(i))
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
-
- for (i = 0; i < nr_cpu_ids; i++) {
- int node = early_cpu_to_node(i);
-
- if (node == NUMA_NO_NODE)
- continue;
- if (!node_online(node))
- numa_clear_node(i);
- }
- numa_init_array();
- return 0;
-}
-
-#ifdef CONFIG_NUMA_EMU
-static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
- [0 ... MAX_NUMNODES-1] = PXM_INVAL
-};
-static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-static int __init find_node_by_addr(unsigned long addr)
-{
- int ret = NUMA_NO_NODE;
- int i;
-
- for_each_node_mask(i, nodes_parsed) {
- /*
- * Find the real node that this emulated node appears on. For
- * the sake of simplicity, we only use a real node's starting
- * address to determine which emulated node it appears on.
- */
- if (addr >= nodes[i].start && addr < nodes[i].end) {
- ret = i;
- break;
- }
- }
- return ret;
+ ret = acpi_numa_init();
+ if (ret < 0)
+ return ret;
+ return srat_disabled() ? -EINVAL : 0;
}
-/*
- * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
- * mappings that respect the real ACPI topology but reflect our emulated
- * environment. For each emulated node, we find which real node it appears on
- * and create PXM to NID mappings for those fake nodes which mirror that
- * locality. SLIT will now represent the correct distances between emulated
- * nodes as a result of the real topology.
- */
-void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
-{
- int i, j;
-
- for (i = 0; i < num_nodes; i++) {
- int nid, pxm;
-
- nid = find_node_by_addr(fake_nodes[i].start);
- if (nid == NUMA_NO_NODE)
- continue;
- pxm = node_to_pxm(nid);
- if (pxm == PXM_INVAL)
- continue;
- fake_node_to_pxm_map[i] = pxm;
- /*
- * For each apicid_to_node mapping that exists for this real
- * node, it must now point to the fake node ID.
- */
- for (j = 0; j < MAX_LOCAL_APIC; j++)
- if (apicid_to_node[j] == nid &&
- fake_apicid_to_node[j] == NUMA_NO_NODE)
- fake_apicid_to_node[j] = i;
- }
-
- /*
- * If there are apicid-to-node mappings for physical nodes that do not
- * have a corresponding emulated node, it should default to a guaranteed
- * value.
- */
- for (i = 0; i < MAX_LOCAL_APIC; i++)
- if (apicid_to_node[i] != NUMA_NO_NODE &&
- fake_apicid_to_node[i] == NUMA_NO_NODE)
- fake_apicid_to_node[i] = 0;
-
- for (i = 0; i < num_nodes; i++)
- __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
- memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
-
- nodes_clear(nodes_parsed);
- for (i = 0; i < num_nodes; i++)
- if (fake_nodes[i].start != fake_nodes[i].end)
- node_set(i, nodes_parsed);
-}
-
-static int null_slit_node_compare(int a, int b)
-{
- return node_to_pxm(a) == node_to_pxm(b);
-}
-#else
-static int null_slit_node_compare(int a, int b)
-{
- return a == b;
-}
-#endif /* CONFIG_NUMA_EMU */
-
-int __node_distance(int a, int b)
-{
- int index;
-
- if (!acpi_slit)
- return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
- REMOTE_DISTANCE;
- index = acpi_slit->locality_count * node_to_pxm(a);
- return acpi_slit->entry[index + node_to_pxm(b)];
-}
-
-EXPORT_SYMBOL(__node_distance);
-
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
int memory_add_physaddr_to_nid(u64 start)
{
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6acc724d5d8f..d6c0418c3e47 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -179,12 +179,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
sender = this_cpu_read(tlb_vector_offset);
f = &flush_state[sender];
- /*
- * Could avoid this lock when
- * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
- * probably not worth checking this for a cache-hot lock.
- */
- raw_spin_lock(&f->tlbstate_lock);
+ if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
+ raw_spin_lock(&f->tlbstate_lock);
f->flush_mm = mm;
f->flush_va = va;
@@ -202,7 +198,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
f->flush_mm = NULL;
f->flush_va = 0;
- raw_spin_unlock(&f->tlbstate_lock);
+ if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
+ raw_spin_unlock(&f->tlbstate_lock);
}
void native_flush_tlb_others(const struct cpumask *cpumask,
@@ -211,11 +208,10 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
if (is_uv_system()) {
unsigned int cpu;
- cpu = get_cpu();
+ cpu = smp_processor_id();
cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
if (cpumask)
flush_tlb_others_ipi(cpumask, mm, va);
- put_cpu();
return;
}
flush_tlb_others_ipi(cpumask, mm, va);
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index e27dffbbb1a7..026e4931d162 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -350,7 +350,7 @@ static int __init early_fill_mp_bus_info(void)
#define ENABLE_CF8_EXT_CFG (1ULL << 46)
-static void enable_pci_io_ecs(void *unused)
+static void __cpuinit enable_pci_io_ecs(void *unused)
{
u64 reg;
rdmsrl(MSR_AMD64_NB_CFG, reg);
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c
index 85b68ef5e809..67858be4b52b 100644
--- a/arch/x86/pci/ce4100.c
+++ b/arch/x86/pci/ce4100.c
@@ -34,6 +34,7 @@
#include <linux/pci.h>
#include <linux/init.h>
+#include <asm/ce4100.h>
#include <asm/pci_x86.h>
struct sim_reg {
@@ -254,7 +255,7 @@ int bridge_read(unsigned int devfn, int reg, int len, u32 *value)
static int ce4100_conf_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
- int i, retval = 1;
+ int i;
if (bus == 1) {
for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) {
@@ -306,10 +307,10 @@ struct pci_raw_ops ce4100_pci_conf = {
.write = ce4100_conf_write,
};
-static int __init ce4100_pci_init(void)
+int __init ce4100_pci_init(void)
{
init_sim_regs();
raw_pci_ops = &ce4100_pci_conf;
- return 0;
+ /* Indicate caller that it should invoke pci_legacy_init() */
+ return 1;
}
-subsys_initcall(ce4100_pci_init);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 25cd4a07d09f..8c4085a95ef1 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -20,7 +20,8 @@
#include <asm/xen/pci.h>
#ifdef CONFIG_ACPI
-static int xen_hvm_register_pirq(u32 gsi, int triggering)
+static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
+ int trigger, int polarity)
{
int rc, irq;
struct physdev_map_pirq map_irq;
@@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
return -1;
}
- if (triggering == ACPI_EDGE_SENSITIVE) {
+ if (trigger == ACPI_EDGE_SENSITIVE) {
shareable = 0;
name = "ioapic-edge";
} else {
@@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering)
return irq;
}
-
-static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi,
- int trigger, int polarity)
-{
- return xen_hvm_register_pirq(gsi, trigger);
-}
#endif
#if defined(CONFIG_PCI_MSI)
@@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq,
static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- int irq, pirq, ret = 0;
+ int irq, pirq;
struct msi_desc *msidesc;
struct msi_msg msg;
@@ -99,39 +94,32 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
__read_msi_msg(msidesc, &msg);
pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
- if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) {
- xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
- "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ);
- if (irq < 0)
+ if (msg.data != XEN_PIRQ_MSI_DATA ||
+ xen_irq_from_pirq(pirq) < 0) {
+ pirq = xen_allocate_pirq_msi(dev, msidesc);
+ if (pirq < 0)
goto error;
- ret = set_irq_msi(irq, msidesc);
- if (ret < 0)
- goto error_while;
- printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d"
- " pirq=%d\n", irq, pirq);
- return 0;
+ xen_msi_compose_msg(dev, pirq, &msg);
+ __write_msi_msg(msidesc, &msg);
+ dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
+ } else {
+ dev_dbg(&dev->dev,
+ "xen: msi already bound to pirq=%d\n", pirq);
}
- xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ?
- "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ));
- if (irq < 0 || pirq < 0)
+ irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
+ (type == PCI_CAP_ID_MSIX) ?
+ "msi-x" : "msi");
+ if (irq < 0)
goto error;
- printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq);
- xen_msi_compose_msg(dev, pirq, &msg);
- ret = set_irq_msi(irq, msidesc);
- if (ret < 0)
- goto error_while;
- write_msi_msg(irq, &msg);
+ dev_dbg(&dev->dev,
+ "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq);
}
return 0;
-error_while:
- unbind_from_irqhandler(irq, NULL);
error:
- if (ret == -ENODEV)
- dev_err(&dev->dev, "Xen PCI frontend has not registered" \
- " MSI/MSI-X support!\n");
-
- return ret;
+ dev_err(&dev->dev,
+ "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+ return -ENODEV;
}
/*
@@ -150,35 +138,26 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
return -ENOMEM;
if (type == PCI_CAP_ID_MSIX)
- ret = xen_pci_frontend_enable_msix(dev, &v, nvec);
+ ret = xen_pci_frontend_enable_msix(dev, v, nvec);
else
- ret = xen_pci_frontend_enable_msi(dev, &v);
+ ret = xen_pci_frontend_enable_msi(dev, v);
if (ret)
goto error;
i = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = xen_allocate_pirq(v[i], 0, /* not sharable */
- (type == PCI_CAP_ID_MSIX) ?
- "pcifront-msi-x" : "pcifront-msi");
- if (irq < 0) {
- ret = -1;
+ irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
+ (type == PCI_CAP_ID_MSIX) ?
+ "pcifront-msi-x" :
+ "pcifront-msi");
+ if (irq < 0)
goto free;
- }
-
- ret = set_irq_msi(irq, msidesc);
- if (ret)
- goto error_while;
i++;
}
kfree(v);
return 0;
-error_while:
- unbind_from_irqhandler(irq, NULL);
error:
- if (ret == -ENODEV)
- dev_err(&dev->dev, "Xen PCI frontend has not registered" \
- " MSI/MSI-X support!\n");
+ dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
free:
kfree(v);
return ret;
@@ -193,6 +172,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev)
xen_pci_frontend_disable_msix(dev);
else
xen_pci_frontend_disable_msi(dev);
+
+ /* Free the IRQ's and the msidesc using the generic code. */
+ default_teardown_msi_irqs(dev);
}
static void xen_teardown_msi_irq(unsigned int irq)
@@ -200,47 +182,82 @@ static void xen_teardown_msi_irq(unsigned int irq)
xen_destroy_irq(irq);
}
+#ifdef CONFIG_XEN_DOM0
static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- int irq, ret;
+ int ret = 0;
struct msi_desc *msidesc;
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = xen_create_msi_irq(dev, msidesc, type);
- if (irq < 0)
- return -1;
+ struct physdev_map_pirq map_irq;
- ret = set_irq_msi(irq, msidesc);
- if (ret)
- goto error;
- }
- return 0;
+ memset(&map_irq, 0, sizeof(map_irq));
+ map_irq.domid = DOMID_SELF;
+ map_irq.type = MAP_PIRQ_TYPE_MSI;
+ map_irq.index = -1;
+ map_irq.pirq = -1;
+ map_irq.bus = dev->bus->number;
+ map_irq.devfn = dev->devfn;
-error:
- xen_destroy_irq(irq);
+ if (type == PCI_CAP_ID_MSIX) {
+ int pos;
+ u32 table_offset, bir;
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+
+ pci_read_config_dword(dev, pos + PCI_MSIX_TABLE,
+ &table_offset);
+ bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
+
+ map_irq.table_base = pci_resource_start(dev, bir);
+ map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
+ }
+
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+ if (ret) {
+ dev_warn(&dev->dev, "xen map irq failed %d\n", ret);
+ goto out;
+ }
+
+ ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
+ map_irq.pirq, map_irq.index,
+ (type == PCI_CAP_ID_MSIX) ?
+ "msi-x" : "msi");
+ if (ret < 0)
+ goto out;
+ }
+ ret = 0;
+out:
return ret;
}
#endif
+#endif
static int xen_pcifront_enable_irq(struct pci_dev *dev)
{
int rc;
int share = 1;
+ u8 gsi;
- dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq);
-
- if (dev->irq < 0)
- return -EINVAL;
+ rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi);
+ if (rc < 0) {
+ dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n",
+ rc);
+ return rc;
+ }
- if (dev->irq < NR_IRQS_LEGACY)
+ if (gsi < NR_IRQS_LEGACY)
share = 0;
- rc = xen_allocate_pirq(dev->irq, share, "pcifront");
+ rc = xen_allocate_pirq(gsi, share, "pcifront");
if (rc < 0) {
- dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n",
- dev->irq, rc);
+ dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n",
+ gsi, rc);
return rc;
}
+
+ dev->irq = rc;
+ dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq);
return 0;
}
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index d2c0d51a7178..28071bb31db7 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -15,21 +15,20 @@
#include <linux/serial_reg.h>
#include <linux/serial_8250.h>
+#include <asm/ce4100.h>
+#include <asm/prom.h>
#include <asm/setup.h>
+#include <asm/i8259.h>
#include <asm/io.h>
+#include <asm/io_apic.h>
static int ce4100_i8042_detect(void)
{
return 0;
}
-static void __init sdv_find_smp_config(void)
-{
-}
-
#ifdef CONFIG_SERIAL_8250
-
static unsigned int mem_serial_in(struct uart_port *p, int offset)
{
offset = offset << p->regshift;
@@ -118,6 +117,15 @@ static void __init sdv_arch_setup(void)
sdv_serial_fixup();
}
+#ifdef CONFIG_X86_IO_APIC
+static void __cpuinit sdv_pci_init(void)
+{
+ x86_of_pci_init();
+ /* We can't set this earlier, because we need to calibrate the timer */
+ legacy_pic = &null_legacy_pic;
+}
+#endif
+
/*
* CE4100 specific x86_init function overrides and early setup
* calls.
@@ -128,5 +136,11 @@ void __init x86_ce4100_early_setup(void)
x86_platform.i8042_detect = ce4100_i8042_detect;
x86_init.resources.probe_roms = x86_init_noop;
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
- x86_init.mpparse.find_smp_config = sdv_find_smp_config;
+ x86_init.mpparse.find_smp_config = x86_init_noop;
+ x86_init.pci.init = ce4100_pci_init;
+
+#ifdef CONFIG_X86_IO_APIC
+ x86_init.pci.init_irq = sdv_pci_init;
+ x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck;
+#endif
}
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
new file mode 100644
index 000000000000..dc701ea58546
--- /dev/null
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -0,0 +1,428 @@
+/*
+ * CE4100 on Falcon Falls
+ *
+ * (c) Copyright 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2 of the License.
+ */
+/dts-v1/;
+/ {
+ model = "intel,falconfalls";
+ compatible = "intel,falconfalls";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "intel,ce4100";
+ reg = <0>;
+ lapic = <&lapic0>;
+ };
+ };
+
+ soc@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "intel,ce4100-cp";
+ ranges;
+
+ ioapic1: interrupt-controller@fec00000 {
+ #interrupt-cells = <2>;
+ compatible = "intel,ce4100-ioapic";
+ interrupt-controller;
+ reg = <0xfec00000 0x1000>;
+ };
+
+ timer@fed00000 {
+ compatible = "intel,ce4100-hpet";
+ reg = <0xfed00000 0x200>;
+ };
+
+ lapic0: interrupt-controller@fee00000 {
+ compatible = "intel,ce4100-lapic";
+ reg = <0xfee00000 0x1000>;
+ };
+
+ pci@3fc {
+ #address-cells = <3>;
+ #size-cells = <2>;
+ compatible = "intel,ce4100-pci", "pci";
+ device_type = "pci";
+ bus-range = <0 0>;
+ ranges = <0x2000000 0 0xbffff000 0xbffff000 0 0x1000
+ 0x2000000 0 0xdffe0000 0xdffe0000 0 0x1000
+ 0x0000000 0 0x0 0x0 0 0x100>;
+
+ /* Secondary IO-APIC */
+ ioapic2: interrupt-controller@0,1 {
+ #interrupt-cells = <2>;
+ compatible = "intel,ce4100-ioapic";
+ interrupt-controller;
+ reg = <0x100 0x0 0x0 0x0 0x0>;
+ assigned-addresses = <0x02000000 0x0 0xbffff000 0x0 0x1000>;
+ };
+
+ pci@1,0 {
+ #address-cells = <3>;
+ #size-cells = <2>;
+ compatible = "intel,ce4100-pci", "pci";
+ device_type = "pci";
+ bus-range = <1 1>;
+ ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>;
+
+ interrupt-parent = <&ioapic2>;
+
+ display@2,0 {
+ compatible = "pci8086,2e5b.2",
+ "pci8086,2e5b",
+ "pciclass038000",
+ "pciclass0380";
+
+ reg = <0x11000 0x0 0x0 0x0 0x0>;
+ interrupts = <0 1>;
+ };
+
+ multimedia@3,0 {
+ compatible = "pci8086,2e5c.2",
+ "pci8086,2e5c",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x11800 0x0 0x0 0x0 0x0>;
+ interrupts = <2 1>;
+ };
+
+ multimedia@4,0 {
+ compatible = "pci8086,2e5d.2",
+ "pci8086,2e5d",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x12000 0x0 0x0 0x0 0x0>;
+ interrupts = <4 1>;
+ };
+
+ multimedia@4,1 {
+ compatible = "pci8086,2e5e.2",
+ "pci8086,2e5e",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x12100 0x0 0x0 0x0 0x0>;
+ interrupts = <5 1>;
+ };
+
+ sound@6,0 {
+ compatible = "pci8086,2e5f.2",
+ "pci8086,2e5f",
+ "pciclass040100",
+ "pciclass0401";
+
+ reg = <0x13000 0x0 0x0 0x0 0x0>;
+ interrupts = <6 1>;
+ };
+
+ sound@6,1 {
+ compatible = "pci8086,2e5f.2",
+ "pci8086,2e5f",
+ "pciclass040100",
+ "pciclass0401";
+
+ reg = <0x13100 0x0 0x0 0x0 0x0>;
+ interrupts = <7 1>;
+ };
+
+ sound@6,2 {
+ compatible = "pci8086,2e60.2",
+ "pci8086,2e60",
+ "pciclass040100",
+ "pciclass0401";
+
+ reg = <0x13200 0x0 0x0 0x0 0x0>;
+ interrupts = <8 1>;
+ };
+
+ display@8,0 {
+ compatible = "pci8086,2e61.2",
+ "pci8086,2e61",
+ "pciclass038000",
+ "pciclass0380";
+
+ reg = <0x14000 0x0 0x0 0x0 0x0>;
+ interrupts = <9 1>;
+ };
+
+ display@8,1 {
+ compatible = "pci8086,2e62.2",
+ "pci8086,2e62",
+ "pciclass038000",
+ "pciclass0380";
+
+ reg = <0x14100 0x0 0x0 0x0 0x0>;
+ interrupts = <10 1>;
+ };
+
+ multimedia@8,2 {
+ compatible = "pci8086,2e63.2",
+ "pci8086,2e63",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x14200 0x0 0x0 0x0 0x0>;
+ interrupts = <11 1>;
+ };
+
+ entertainment-encryption@9,0 {
+ compatible = "pci8086,2e64.2",
+ "pci8086,2e64",
+ "pciclass101000",
+ "pciclass1010";
+
+ reg = <0x14800 0x0 0x0 0x0 0x0>;
+ interrupts = <12 1>;
+ };
+
+ localbus@a,0 {
+ compatible = "pci8086,2e65.2",
+ "pci8086,2e65",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x15000 0x0 0x0 0x0 0x0>;
+ };
+
+ serial@b,0 {
+ compatible = "pci8086,2e66.2",
+ "pci8086,2e66",
+ "pciclass070003",
+ "pciclass0700";
+
+ reg = <0x15800 0x0 0x0 0x0 0x0>;
+ interrupts = <14 1>;
+ };
+
+ gpio@b,1 {
+ compatible = "pci8086,2e67.2",
+ "pci8086,2e67",
+ "pciclassff0000",
+ "pciclassff00";
+
+ #gpio-cells = <2>;
+ reg = <0x15900 0x0 0x0 0x0 0x0>;
+ interrupts = <15 1>;
+ gpio-controller;
+ };
+
+ i2c-controller@b,2 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "pci8086,2e68.2",
+ "pci8086,2e68",
+ "pciclass,ff0000",
+ "pciclass,ff00";
+
+ reg = <0x15a00 0x0 0x0 0x0 0x0>;
+ interrupts = <16 1>;
+ ranges = <0 0 0x02000000 0 0xdffe0500 0x100
+ 1 0 0x02000000 0 0xdffe0600 0x100
+ 2 0 0x02000000 0 0xdffe0700 0x100>;
+
+ i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <0 0 0x100>;
+ };
+
+ i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <1 0 0x100>;
+
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+
+ i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <2 0 0x100>;
+
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+ };
+
+ smard-card@b,3 {
+ compatible = "pci8086,2e69.2",
+ "pci8086,2e69",
+ "pciclass070500",
+ "pciclass0705";
+
+ reg = <0x15b00 0x0 0x0 0x0 0x0>;
+ interrupts = <15 1>;
+ };
+
+ spi-controller@b,4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible =
+ "pci8086,2e6a.2",
+ "pci8086,2e6a",
+ "pciclass,ff0000",
+ "pciclass,ff00";
+
+ reg = <0x15c00 0x0 0x0 0x0 0x0>;
+ interrupts = <15 1>;
+
+ dac@0 {
+ compatible = "ti,pcm1755";
+ reg = <0>;
+ spi-max-frequency = <115200>;
+ };
+
+ dac@1 {
+ compatible = "ti,pcm1609a";
+ reg = <1>;
+ spi-max-frequency = <115200>;
+ };
+
+ eeprom@2 {
+ compatible = "atmel,at93c46";
+ reg = <2>;
+ spi-max-frequency = <115200>;
+ };
+ };
+
+ multimedia@b,7 {
+ compatible = "pci8086,2e6d.2",
+ "pci8086,2e6d",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x15f00 0x0 0x0 0x0 0x0>;
+ };
+
+ ethernet@c,0 {
+ compatible = "pci8086,2e6e.2",
+ "pci8086,2e6e",
+ "pciclass020000",
+ "pciclass0200";
+
+ reg = <0x16000 0x0 0x0 0x0 0x0>;
+ interrupts = <21 1>;
+ };
+
+ clock@c,1 {
+ compatible = "pci8086,2e6f.2",
+ "pci8086,2e6f",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x16100 0x0 0x0 0x0 0x0>;
+ interrupts = <3 1>;
+ };
+
+ usb@d,0 {
+ compatible = "pci8086,2e70.2",
+ "pci8086,2e70",
+ "pciclass0c0320",
+ "pciclass0c03";
+
+ reg = <0x16800 0x0 0x0 0x0 0x0>;
+ interrupts = <22 3>;
+ };
+
+ usb@d,1 {
+ compatible = "pci8086,2e70.2",
+ "pci8086,2e70",
+ "pciclass0c0320",
+ "pciclass0c03";
+
+ reg = <0x16900 0x0 0x0 0x0 0x0>;
+ interrupts = <22 3>;
+ };
+
+ sata@e,0 {
+ compatible = "pci8086,2e71.0",
+ "pci8086,2e71",
+ "pciclass010601",
+ "pciclass0106";
+
+ reg = <0x17000 0x0 0x0 0x0 0x0>;
+ interrupts = <23 3>;
+ };
+
+ flash@f,0 {
+ compatible = "pci8086,701.1",
+ "pci8086,701",
+ "pciclass050100",
+ "pciclass0501";
+
+ reg = <0x17800 0x0 0x0 0x0 0x0>;
+ interrupts = <13 1>;
+ };
+
+ entertainment-encryption@10,0 {
+ compatible = "pci8086,702.1",
+ "pci8086,702",
+ "pciclass101000",
+ "pciclass1010";
+
+ reg = <0x18000 0x0 0x0 0x0 0x0>;
+ };
+
+ co-processor@11,0 {
+ compatible = "pci8086,703.1",
+ "pci8086,703",
+ "pciclass0b4000",
+ "pciclass0b40";
+
+ reg = <0x18800 0x0 0x0 0x0 0x0>;
+ interrupts = <1 1>;
+ };
+
+ multimedia@12,0 {
+ compatible = "pci8086,704.0",
+ "pci8086,704",
+ "pciclass048000",
+ "pciclass0480";
+
+ reg = <0x19000 0x0 0x0 0x0 0x0>;
+ };
+ };
+
+ isa@1f,0 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "isa";
+ ranges = <1 0 0 0 0 0x100>;
+
+ rtc@70 {
+ compatible = "intel,ce4100-rtc", "motorola,mc146818";
+ interrupts = <8 3>;
+ interrupt-parent = <&ioapic1>;
+ ctrl-reg = <2>;
+ freq-reg = <0x26>;
+ reg = <1 0x70 2>;
+ };
+ };
+ };
+ };
+};
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index ea6529e93c6f..5c0207bf959b 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -31,6 +31,7 @@
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/mrst.h>
+#include <asm/mrst-vrtc.h>
#include <asm/io.h>
#include <asm/i8259.h>
#include <asm/intel_scu_ipc.h>
@@ -268,6 +269,7 @@ void __init x86_mrst_early_setup(void)
x86_platform.calibrate_tsc = mrst_calibrate_tsc;
x86_platform.i8042_detect = mrst_i8042_detect;
+ x86_init.timers.wallclock_init = mrst_rtc_init;
x86_init.pci.init = pci_mrst_init;
x86_init.pci.fixup_irqs = x86_init_noop;
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
index 32cd7edd71a0..04cf645feb92 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -100,22 +100,14 @@ int vrtc_set_mmss(unsigned long nowtime)
void __init mrst_rtc_init(void)
{
- unsigned long rtc_paddr;
- void __iomem *virt_base;
+ unsigned long vrtc_paddr = sfi_mrtc_array[0].phys_addr;
sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
- if (!sfi_mrtc_num)
+ if (!sfi_mrtc_num || !vrtc_paddr)
return;
- rtc_paddr = sfi_mrtc_array[0].phys_addr;
-
- /* vRTC's register address may not be page aligned */
- set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr);
-
- virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC);
- virt_base += rtc_paddr & ~PAGE_MASK;
- vrtc_virt_base = virt_base;
-
+ vrtc_virt_base = (void __iomem *)set_fixmap_offset_nocache(FIX_LNW_VRTC,
+ vrtc_paddr);
x86_platform.get_wallclock = vrtc_get_time;
x86_platform.set_wallclock = vrtc_set_mmss;
}
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile
index e797428b163b..c2a8cab65e5d 100644
--- a/arch/x86/platform/olpc/Makefile
+++ b/arch/x86/platform/olpc/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_OLPC) += olpc.o
obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o
-obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o
-obj-$(CONFIG_OLPC_OPENFIRMWARE_DT) += olpc_dt.o
+obj-$(CONFIG_OLPC) += olpc_ofw.o
+obj-$(CONFIG_OF_PROMTREE) += olpc_dt.o
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index dab874647530..044bda5b3174 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -140,8 +140,7 @@ void * __init prom_early_alloc(unsigned long size)
* wasted bootmem) and hand off chunks of it to callers.
*/
res = alloc_bootmem(chunk_size);
- if (!res)
- return NULL;
+ BUG_ON(!res);
prom_early_allocated += chunk_size;
memset(res, 0, chunk_size);
free_mem = chunk_size;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index df58e9cad96a..a7b38d35c29a 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1364,11 +1364,11 @@ uv_activation_descriptor_init(int node, int pnode)
memset(bd2, 0, sizeof(struct bau_desc));
bd2->header.sw_ack_flag = 1;
/*
- * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub
+ * base_dest_nodeid is the nasid of the first uvhub
* in the partition. The bit map will indicate uvhub numbers,
* which are 0-N in a partition. Pnodes are unique system-wide.
*/
- bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
+ bd2->header.base_dest_nodeid = UV_PNODE_TO_NASID(uv_partition_base_pnode);
bd2->header.dest_subnodeid = 0x10; /* the LB */
bd2->header.command = UV_NET_ENDPOINT_INTD;
bd2->header.int_both = 1;
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index 7b24460917d5..374a05d8ad22 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -131,7 +131,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
unsigned long mmr_offset, int limit)
{
const struct cpumask *eligible_cpu = cpumask_of(cpu);
- struct irq_cfg *cfg = get_irq_chip_data(irq);
+ struct irq_cfg *cfg = irq_get_chip_data(irq);
unsigned long mmr_value;
struct uv_IO_APIC_route_entry *entry;
int mmr_pnode, err;
@@ -148,7 +148,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
else
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+ irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
irq_name);
mmr_value = 0;
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c
index 632037671746..fe4cf8294878 100644
--- a/arch/x86/platform/visws/visws_quirks.c
+++ b/arch/x86/platform/visws/visws_quirks.c
@@ -569,11 +569,13 @@ out_unlock:
static struct irqaction master_action = {
.handler = piix4_master_intr,
.name = "PIIX4-8259",
+ .flags = IRQF_NO_THREAD,
};
static struct irqaction cascade_action = {
.handler = no_action,
.name = "cascade",
+ .flags = IRQF_NO_THREAD,
};
static inline void set_piix4_virtual_irq_type(void)
@@ -606,7 +608,7 @@ static void __init visws_pre_intr_init(void)
chip = &cobalt_irq_type;
if (chip)
- set_irq_chip(i, chip);
+ irq_set_chip(i, chip);
}
setup_irq(CO_IRQ_8259, &master_action);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5b54892e4bc3..1c7121ba18ff 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -38,7 +38,7 @@ config XEN_MAX_DOMAIN_MEMORY
config XEN_SAVE_RESTORE
bool
- depends on XEN && PM
+ depends on XEN
default y
config XEN_DEBUG_FS
@@ -48,3 +48,11 @@ config XEN_DEBUG_FS
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.
+
+config XEN_DEBUG
+ bool "Enable Xen debug checks"
+ depends on XEN
+ default n
+ help
+ Enable various WARN_ON checks in the Xen MMU code.
+ Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 50542efe45fb..49dbd78ec3cb 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor)
xen_setup_features();
- pv_info = xen_info;
- pv_info.kernel_rpl = 0;
+ pv_info.name = "Xen HVM";
xen_domain_type = XEN_HVM_DOMAIN;
return 0;
}
-void xen_hvm_init_shared_info(void)
+void __ref xen_hvm_init_shared_info(void)
{
int cpu;
struct xen_add_to_physmap xatp;
@@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_UP_PREPARE:
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
+ if (xen_have_vector_callback)
+ xen_init_lock_cpu(cpu);
break;
default:
break;
@@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void)
if (xen_feature(XENFEAT_hvm_callback_vector))
xen_have_vector_callback = 1;
+ xen_hvm_smp_init();
register_cpu_notifier(&xen_hvm_cpu_notifier);
xen_unplug_emulated_devices();
have_vcpu_info_placement = 0;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5e92b61ad574..3f6f3347aa17 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -46,6 +46,7 @@
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
+#include <linux/seq_file.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (val & _PAGE_PRESENT) {
unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
pteval_t flags = val & PTE_FLAGS_MASK;
- unsigned long mfn = pfn_to_mfn(pfn);
+ unsigned long mfn;
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ mfn = get_phys_to_machine(pfn);
+ else
+ mfn = pfn;
/*
* If there's no mfn for the pfn, then just create an
* empty non-present pte. Unfortunately this loses
@@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val)
if (unlikely(mfn == INVALID_P2M_ENTRY)) {
mfn = 0;
flags = 0;
+ } else {
+ /*
+ * Paramount to do this test _after_ the
+ * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY &
+ * IDENTITY_FRAME_BIT resolves to true.
+ */
+ mfn &= ~FOREIGN_FRAME_BIT;
+ if (mfn & IDENTITY_FRAME_BIT) {
+ mfn &= ~IDENTITY_FRAME_BIT;
+ flags |= _PAGE_IOMAP;
+ }
}
-
val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
}
@@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte)
}
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
+#ifdef CONFIG_XEN_DEBUG
+pte_t xen_make_pte_debug(pteval_t pte)
+{
+ phys_addr_t addr = (pte & PTE_PFN_MASK);
+ phys_addr_t other_addr;
+ bool io_page = false;
+ pte_t _pte;
+
+ if (pte & _PAGE_IOMAP)
+ io_page = true;
+
+ _pte = xen_make_pte(pte);
+
+ if (!addr)
+ return _pte;
+
+ if (io_page &&
+ (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
+ other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
+ WARN(addr != other_addr,
+ "0x%lx is using VM_IO, but it is 0x%lx!\n",
+ (unsigned long)addr, (unsigned long)other_addr);
+ } else {
+ pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
+ other_addr = (_pte.pte & PTE_PFN_MASK);
+ WARN((addr == other_addr) && (!io_page) && (!iomap_set),
+ "0x%lx is missing VM_IO (and wasn't fixed)!\n",
+ (unsigned long)addr);
+ }
+
+ return _pte;
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
+#endif
+
pgd_t xen_make_pgd(pgdval_t pgd)
{
pgd = pte_pfn_to_mfn(pgd);
@@ -986,10 +1036,9 @@ static void xen_pgd_pin(struct mm_struct *mm)
*/
void xen_mm_pin_all(void)
{
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (!PagePinned(page)) {
@@ -998,7 +1047,7 @@ void xen_mm_pin_all(void)
}
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
/*
@@ -1099,10 +1148,9 @@ static void xen_pgd_unpin(struct mm_struct *mm)
*/
void xen_mm_unpin_all(void)
{
- unsigned long flags;
struct page *page;
- spin_lock_irqsave(&pgd_lock, flags);
+ spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
if (PageSavePinned(page)) {
@@ -1112,7 +1160,7 @@ void xen_mm_unpin_all(void)
}
}
- spin_unlock_irqrestore(&pgd_lock, flags);
+ spin_unlock(&pgd_lock);
}
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
@@ -1443,7 +1491,7 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
* early_ioremap fixmap slot, make sure it is RO.
*/
if (!is_early_ioremap_ptep(ptep) &&
- pfn >= e820_table_start && pfn < e820_table_end)
+ pfn >= pgt_buf_start && pfn < pgt_buf_end)
pte = pte_wrprotect(pte);
return pte;
@@ -1942,6 +1990,9 @@ __init void xen_ident_map_ISA(void)
static __init void xen_post_allocator_init(void)
{
+#ifdef CONFIG_XEN_DEBUG
+ pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
+#endif
pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud;
@@ -2074,7 +2125,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
in_frames[i] = virt_to_mfn(vaddr);
MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
- set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
+ __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
if (out_frames)
out_frames[i] = virt_to_pfn(vaddr);
@@ -2353,6 +2404,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
#ifdef CONFIG_XEN_DEBUG_FS
+static int p2m_dump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, p2m_dump_show, NULL);
+}
+
+static const struct file_operations p2m_dump_fops = {
+ .open = p2m_dump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static struct dentry *d_mmu_debug;
static int __init xen_mmu_debugfs(void)
@@ -2408,6 +2471,7 @@ static int __init xen_mmu_debugfs(void)
debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
&mmu_stats.prot_commit_batched);
+ debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
return 0;
}
fs_initcall(xen_mmu_debugfs);
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index fd12d7ce7ff9..215a3ce61068 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -23,6 +23,129 @@
* P2M_PER_PAGE depends on the architecture, as a mfn is always
* unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
* 512 and 1024 entries respectively.
+ *
+ * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
+ *
+ * However not all entries are filled with MFNs. Specifically for all other
+ * leaf entries, or for the top root, or middle one, for which there is a void
+ * entry, we assume it is "missing". So (for example)
+ * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY.
+ *
+ * We also have the possibility of setting 1-1 mappings on certain regions, so
+ * that:
+ * pfn_to_mfn(0xc0000)=0xc0000
+ *
+ * The benefit of this is, that we can assume for non-RAM regions (think
+ * PCI BARs, or ACPI spaces), we can create mappings easily b/c we
+ * get the PFN value to match the MFN.
+ *
+ * For this to work efficiently we have one new page p2m_identity and
+ * allocate (via reserved_brk) any other pages we need to cover the sides
+ * (1GB or 4MB boundary violations). All entries in p2m_identity are set to
+ * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs,
+ * no other fancy value).
+ *
+ * On lookup we spot that the entry points to p2m_identity and return the
+ * identity value instead of dereferencing and returning INVALID_P2M_ENTRY.
+ * If the entry points to an allocated page, we just proceed as before and
+ * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in
+ * appropriate functions (pfn_to_mfn).
+ *
+ * The reason for having the IDENTITY_FRAME_BIT instead of just returning the
+ * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a
+ * non-identity pfn. To protect ourselves against we elect to set (and get) the
+ * IDENTITY_FRAME_BIT on all identity mapped PFNs.
+ *
+ * This simplistic diagram is used to explain the more subtle piece of code.
+ * There is also a digram of the P2M at the end that can help.
+ * Imagine your E820 looking as so:
+ *
+ * 1GB 2GB
+ * /-------------------+---------\/----\ /----------\ /---+-----\
+ * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM |
+ * \-------------------+---------/\----/ \----------/ \---+-----/
+ * ^- 1029MB ^- 2001MB
+ *
+ * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100),
+ * 2048MB = 524288 (0x80000)]
+ *
+ * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB
+ * is actually not present (would have to kick the balloon driver to put it in).
+ *
+ * When we are told to set the PFNs for identity mapping (see patch: "xen/setup:
+ * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start
+ * of the PFN and the end PFN (263424 and 512256 respectively). The first step
+ * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page
+ * covers 512^2 of page estate (1GB) and in case the start or end PFN is not
+ * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn
+ * to end pfn. We reserve_brk top leaf pages if they are missing (means they
+ * point to p2m_mid_missing).
+ *
+ * With the E820 example above, 263424 is not 1GB aligned so we allocate a
+ * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000.
+ * Each entry in the allocate page is "missing" (points to p2m_missing).
+ *
+ * Next stage is to determine if we need to do a more granular boundary check
+ * on the 4MB (or 2MB depending on architecture) off the start and end pfn's.
+ * We check if the start pfn and end pfn violate that boundary check, and if
+ * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer
+ * granularity of setting which PFNs are missing and which ones are identity.
+ * In our example 263424 and 512256 both fail the check so we reserve_brk two
+ * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing"
+ * values) and assign them to p2m[1][2] and p2m[1][488] respectively.
+ *
+ * At this point we would at minimum reserve_brk one page, but could be up to
+ * three. Each call to set_phys_range_identity has at maximum a three page
+ * cost. If we were to query the P2M at this stage, all those entries from
+ * start PFN through end PFN (so 1029MB -> 2001MB) would return
+ * INVALID_P2M_ENTRY ("missing").
+ *
+ * The next step is to walk from the start pfn to the end pfn setting
+ * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity.
+ * If we find that the middle leaf is pointing to p2m_missing we can swap it
+ * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this
+ * point we do not need to worry about boundary aligment (so no need to
+ * reserve_brk a middle page, figure out which PFNs are "missing" and which
+ * ones are identity), as that has been done earlier. If we find that the
+ * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference
+ * that page (which covers 512 PFNs) and set the appropriate PFN with
+ * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we
+ * set from p2m[1][2][256->511] and p2m[1][488][0->256] with
+ * IDENTITY_FRAME_BIT set.
+ *
+ * All other regions that are void (or not filled) either point to p2m_missing
+ * (considered missing) or have the default value of INVALID_P2M_ENTRY (also
+ * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511]
+ * contain the INVALID_P2M_ENTRY value and are considered "missing."
+ *
+ * This is what the p2m ends up looking (for the E820 above) with this
+ * fabulous drawing:
+ *
+ * p2m /--------------\
+ * /-----\ | &mfn_list[0],| /-----------------\
+ * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. |
+ * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] |
+ * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] |
+ * |-----| \ | [p2m_identity]+\\ | .... |
+ * | 2 |--\ \-------------------->| ... | \\ \----------------/
+ * |-----| \ \---------------/ \\
+ * | 3 |\ \ \\ p2m_identity
+ * |-----| \ \-------------------->/---------------\ /-----------------\
+ * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... |
+ * \-----/ / | [p2m_identity]+-->| ..., ~0 |
+ * / /---------------\ | .... | \-----------------/
+ * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. |
+ * / | IDENTITY[@256]|<----/ \---------------/
+ * / | ~0, ~0, .... |
+ * | \---------------/
+ * |
+ * p2m_missing p2m_missing
+ * /------------------\ /------------\
+ * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
+ * | [p2m_mid_missing]+---->| ..., ~0 |
+ * \------------------/ \------------/
+ *
+ * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
*/
#include <linux/init.h>
@@ -30,6 +153,7 @@
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/sched.h>
+#include <linux/seq_file.h>
#include <asm/cache.h>
#include <asm/setup.h>
@@ -59,9 +183,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE);
static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE);
+static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE);
+
RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE)));
+/* We might hit two boundary violations at the start and end, at max each
+ * boundary violation will require three middle nodes. */
+RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3);
+
static inline unsigned p2m_top_index(unsigned long pfn)
{
BUG_ON(pfn >= MAX_P2M_PFN);
@@ -136,7 +266,7 @@ static void p2m_init(unsigned long *p2m)
* - After resume we're called from within stop_machine, but the mfn
* tree should alreay be completely allocated.
*/
-void xen_build_mfn_list_list(void)
+void __ref xen_build_mfn_list_list(void)
{
unsigned long pfn;
@@ -221,6 +351,9 @@ void __init xen_build_dynamic_phys_to_machine(void)
p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE);
p2m_top_init(p2m_top);
+ p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE);
+ p2m_init(p2m_identity);
+
/*
* The domain builder gives us a pre-constructed p2m array in
* mfn_list for all the pages initially given to us, so we just
@@ -266,6 +399,14 @@ unsigned long get_phys_to_machine(unsigned long pfn)
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
+ /*
+ * The INVALID_P2M_ENTRY is filled in both p2m_*identity
+ * and in p2m_*missing, so returning the INVALID_P2M_ENTRY
+ * would be wrong.
+ */
+ if (p2m_top[topidx][mididx] == p2m_identity)
+ return IDENTITY_FRAME(pfn);
+
return p2m_top[topidx][mididx][idx];
}
EXPORT_SYMBOL_GPL(get_phys_to_machine);
@@ -335,9 +476,11 @@ static bool alloc_p2m(unsigned long pfn)
p2m_top_mfn_p[topidx] = mid_mfn;
}
- if (p2m_top[topidx][mididx] == p2m_missing) {
+ if (p2m_top[topidx][mididx] == p2m_identity ||
+ p2m_top[topidx][mididx] == p2m_missing) {
/* p2m leaf page is missing */
unsigned long *p2m;
+ unsigned long *p2m_orig = p2m_top[topidx][mididx];
p2m = alloc_p2m_page();
if (!p2m)
@@ -345,7 +488,7 @@ static bool alloc_p2m(unsigned long pfn)
p2m_init(p2m);
- if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing)
+ if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig)
free_p2m_page(p2m);
else
mid_mfn[mididx] = virt_to_mfn(p2m);
@@ -354,11 +497,91 @@ static bool alloc_p2m(unsigned long pfn)
return true;
}
+bool __early_alloc_p2m(unsigned long pfn)
+{
+ unsigned topidx, mididx, idx;
+
+ topidx = p2m_top_index(pfn);
+ mididx = p2m_mid_index(pfn);
+ idx = p2m_index(pfn);
+
+ /* Pfff.. No boundary cross-over, lets get out. */
+ if (!idx)
+ return false;
+
+ WARN(p2m_top[topidx][mididx] == p2m_identity,
+ "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n",
+ topidx, mididx);
+
+ /*
+ * Could be done by xen_build_dynamic_phys_to_machine..
+ */
+ if (p2m_top[topidx][mididx] != p2m_missing)
+ return false;
+
+ /* Boundary cross-over for the edges: */
+ if (idx) {
+ unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE);
+
+ p2m_init(p2m);
+
+ p2m_top[topidx][mididx] = p2m;
+
+ }
+ return idx != 0;
+}
+unsigned long set_phys_range_identity(unsigned long pfn_s,
+ unsigned long pfn_e)
+{
+ unsigned long pfn;
+
+ if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN))
+ return 0;
+
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap)))
+ return pfn_e - pfn_s;
+
+ if (pfn_s > pfn_e)
+ return 0;
+
+ for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1));
+ pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE));
+ pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE)
+ {
+ unsigned topidx = p2m_top_index(pfn);
+ if (p2m_top[topidx] == p2m_mid_missing) {
+ unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE);
+
+ p2m_mid_init(mid);
+
+ p2m_top[topidx] = mid;
+ }
+ }
+
+ __early_alloc_p2m(pfn_s);
+ __early_alloc_p2m(pfn_e);
+
+ for (pfn = pfn_s; pfn < pfn_e; pfn++)
+ if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn)))
+ break;
+
+ if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s),
+ "Identity mapping failed. We are %ld short of 1-1 mappings!\n",
+ (pfn_e - pfn_s) - (pfn - pfn_s)))
+ printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn);
+
+ return pfn - pfn_s;
+}
+
/* Try to install p2m mapping; fail if intermediate bits missing */
bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
unsigned topidx, mididx, idx;
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+ BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+ return true;
+ }
if (unlikely(pfn >= MAX_P2M_PFN)) {
BUG_ON(mfn != INVALID_P2M_ENTRY);
return true;
@@ -368,6 +591,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
mididx = p2m_mid_index(pfn);
idx = p2m_index(pfn);
+ /* For sparse holes were the p2m leaf has real PFN along with
+ * PCI holes, stick in the PFN as the MFN value.
+ */
+ if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) {
+ if (p2m_top[topidx][mididx] == p2m_identity)
+ return true;
+
+ /* Swap over from MISSING to IDENTITY if needed. */
+ if (p2m_top[topidx][mididx] == p2m_missing) {
+ WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing,
+ p2m_identity) != p2m_missing);
+ return true;
+ }
+ }
+
if (p2m_top[topidx][mididx] == p2m_missing)
return mfn == INVALID_P2M_ENTRY;
@@ -378,11 +616,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
- BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
- return true;
- }
-
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
if (!alloc_p2m(pfn))
return false;
@@ -421,7 +654,7 @@ int m2p_add_override(unsigned long mfn, struct page *page)
{
unsigned long flags;
unsigned long pfn;
- unsigned long address;
+ unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
@@ -455,7 +688,7 @@ int m2p_remove_override(struct page *page)
unsigned long flags;
unsigned long mfn;
unsigned long pfn;
- unsigned long address;
+ unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
@@ -520,3 +753,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn)
return ret;
}
EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
+
+#ifdef CONFIG_XEN_DEBUG_FS
+
+int p2m_dump_show(struct seq_file *m, void *v)
+{
+ static const char * const level_name[] = { "top", "middle",
+ "entry", "abnormal" };
+ static const char * const type_name[] = { "identity", "missing",
+ "pfn", "abnormal"};
+#define TYPE_IDENTITY 0
+#define TYPE_MISSING 1
+#define TYPE_PFN 2
+#define TYPE_UNKNOWN 3
+ unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
+ unsigned int uninitialized_var(prev_level);
+ unsigned int uninitialized_var(prev_type);
+
+ if (!p2m_top)
+ return 0;
+
+ for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) {
+ unsigned topidx = p2m_top_index(pfn);
+ unsigned mididx = p2m_mid_index(pfn);
+ unsigned idx = p2m_index(pfn);
+ unsigned lvl, type;
+
+ lvl = 4;
+ type = TYPE_UNKNOWN;
+ if (p2m_top[topidx] == p2m_mid_missing) {
+ lvl = 0; type = TYPE_MISSING;
+ } else if (p2m_top[topidx] == NULL) {
+ lvl = 0; type = TYPE_UNKNOWN;
+ } else if (p2m_top[topidx][mididx] == NULL) {
+ lvl = 1; type = TYPE_UNKNOWN;
+ } else if (p2m_top[topidx][mididx] == p2m_identity) {
+ lvl = 1; type = TYPE_IDENTITY;
+ } else if (p2m_top[topidx][mididx] == p2m_missing) {
+ lvl = 1; type = TYPE_MISSING;
+ } else if (p2m_top[topidx][mididx][idx] == 0) {
+ lvl = 2; type = TYPE_UNKNOWN;
+ } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) {
+ lvl = 2; type = TYPE_IDENTITY;
+ } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) {
+ lvl = 2; type = TYPE_MISSING;
+ } else if (p2m_top[topidx][mididx][idx] == pfn) {
+ lvl = 2; type = TYPE_PFN;
+ } else if (p2m_top[topidx][mididx][idx] != pfn) {
+ lvl = 2; type = TYPE_PFN;
+ }
+ if (pfn == 0) {
+ prev_level = lvl;
+ prev_type = type;
+ }
+ if (pfn == MAX_DOMAIN_PAGES-1) {
+ lvl = 3;
+ type = TYPE_UNKNOWN;
+ }
+ if (prev_type != type) {
+ seq_printf(m, " [0x%lx->0x%lx] %s\n",
+ prev_pfn_type, pfn, type_name[prev_type]);
+ prev_pfn_type = pfn;
+ prev_type = type;
+ }
+ if (prev_level != lvl) {
+ seq_printf(m, " [0x%lx->0x%lx] level %s\n",
+ prev_pfn_level, pfn, level_name[prev_level]);
+ prev_pfn_level = pfn;
+ prev_level = lvl;
+ }
+ }
+ return 0;
+#undef TYPE_IDENTITY
+#undef TYPE_MISSING
+#undef TYPE_PFN
+#undef TYPE_UNKNOWN
+}
+#endif
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a8a66a50d446..fa0269a99377 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
static __init void xen_add_extra_mem(unsigned long pages)
{
+ unsigned long pfn;
+
u64 size = (u64)pages * PAGE_SIZE;
u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
@@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages)
xen_extra_mem_size += size;
xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
+
+ for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
@@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
start, end, ret);
if (ret == 1) {
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
len++;
}
}
@@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
return released;
}
+static unsigned long __init xen_set_identity(const struct e820entry *list,
+ ssize_t map_size)
+{
+ phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
+ phys_addr_t start_pci = last;
+ const struct e820entry *entry;
+ unsigned long identity = 0;
+ int i;
+
+ for (i = 0, entry = list; i < map_size; i++, entry++) {
+ phys_addr_t start = entry->addr;
+ phys_addr_t end = start + entry->size;
+
+ if (start < last)
+ start = last;
+
+ if (end <= start)
+ continue;
+
+ /* Skip over the 1MB region. */
+ if (last > end)
+ continue;
+
+ if (entry->type == E820_RAM) {
+ if (start > start_pci)
+ identity += set_phys_range_identity(
+ PFN_UP(start_pci), PFN_DOWN(start));
+
+ /* Without saving 'last' we would gooble RAM too
+ * at the end of the loop. */
+ last = end;
+ start_pci = end;
+ continue;
+ }
+ start_pci = min(start, start_pci);
+ last = end;
+ }
+ if (last > start_pci)
+ identity += set_phys_range_identity(
+ PFN_UP(start_pci), PFN_DOWN(last));
+ return identity;
+}
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
char * __init xen_memory_setup(void)
{
static struct e820entry map[E820MAX] __initdata;
+ static struct e820entry map_raw[E820MAX] __initdata;
unsigned long max_pfn = xen_start_info->nr_pages;
unsigned long long mem_end;
@@ -151,6 +199,7 @@ char * __init xen_memory_setup(void)
struct xen_memory_map memmap;
unsigned long extra_pages = 0;
unsigned long extra_limit;
+ unsigned long identity_pages = 0;
int i;
int op;
@@ -176,6 +225,7 @@ char * __init xen_memory_setup(void)
}
BUG_ON(rc);
+ memcpy(map_raw, map, sizeof(map));
e820.nr_map = 0;
xen_extra_mem_start = mem_end;
for (i = 0; i < memmap.nr_entries; i++) {
@@ -194,6 +244,15 @@ char * __init xen_memory_setup(void)
end -= delta;
extra_pages += PFN_DOWN(delta);
+ /*
+ * Set RAM below 4GB that is not for us to be unusable.
+ * This prevents "System RAM" address space from being
+ * used as potential resource for I/O address (happens
+ * when 'allocate_resource' is called).
+ */
+ if (delta &&
+ (xen_initial_domain() && end < 0x100000000ULL))
+ e820_add_region(end, delta, E820_UNUSABLE);
}
if (map[i].size > 0 && end > xen_extra_mem_start)
@@ -251,6 +310,13 @@ char * __init xen_memory_setup(void)
xen_add_extra_mem(extra_pages);
+ /*
+ * Set P2M for all non-RAM pages and E820 gaps to be identity
+ * type PFNs. We supply it with the non-sanitized version
+ * of the E820.
+ */
+ identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
+ printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
return "Xen";
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 72a4c7959045..30612441ed99 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -509,3 +509,41 @@ void __init xen_smp_init(void)
xen_fill_possible_map();
xen_init_spinlocks();
}
+
+static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
+{
+ native_smp_prepare_cpus(max_cpus);
+ WARN_ON(xen_smp_intr_init(0));
+
+ if (!xen_have_vector_callback)
+ return;
+ xen_init_lock_cpu(0);
+ xen_init_spinlocks();
+}
+
+static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
+{
+ int rc;
+ rc = native_cpu_up(cpu);
+ WARN_ON (xen_smp_intr_init(cpu));
+ return rc;
+}
+
+static void xen_hvm_cpu_die(unsigned int cpu)
+{
+ unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
+ native_cpu_die(cpu);
+}
+
+void __init xen_hvm_smp_init(void)
+{
+ smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
+ smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
+ smp_ops.cpu_up = xen_hvm_cpu_up;
+ smp_ops.cpu_die = xen_hvm_cpu_die;
+ smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
+ smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
+}
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 9bbd63a129b5..45329c8c226e 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -12,7 +12,7 @@
#include "xen-ops.h"
#include "mmu.h"
-void xen_pre_suspend(void)
+void xen_arch_pre_suspend(void)
{
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
@@ -26,8 +26,9 @@ void xen_pre_suspend(void)
BUG();
}
-void xen_hvm_post_suspend(int suspend_cancelled)
+void xen_arch_hvm_post_suspend(int suspend_cancelled)
{
+#ifdef CONFIG_XEN_PVHVM
int cpu;
xen_hvm_init_shared_info();
xen_callback_vector();
@@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled)
xen_setup_runstate_info(cpu);
}
}
+#endif
}
-void xen_post_suspend(int suspend_cancelled)
+void xen_arch_post_suspend(int suspend_cancelled)
{
xen_build_mfn_list_list();
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 067759e3d6a5..2e2d370a47b1 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -397,7 +397,9 @@ void xen_setup_timer(int cpu)
name = "<timer kasprintf failed>";
irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER,
+ IRQF_DISABLED|IRQF_PERCPU|
+ IRQF_NOBALANCING|IRQF_TIMER|
+ IRQF_FORCE_RESUME,
name, NULL);
evt = &per_cpu(xen_clock_events, cpu);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 1a5ff24e29c0..aaa7291c9259 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -28,9 +28,9 @@ ENTRY(startup_xen)
__FINIT
.pushsection .text
- .align PAGE_SIZE_asm
+ .align PAGE_SIZE
ENTRY(hypercall_page)
- .skip PAGE_SIZE_asm
+ .skip PAGE_SIZE
.popsection
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9d41bf985757..3112f55638c4 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
void xen_smp_init(void);
+void __init xen_hvm_smp_init(void);
extern cpumask_var_t xen_cpu_initialized_map;
#else
static inline void xen_smp_init(void) {}
+static inline void xen_hvm_smp_init(void) {}
#endif
#ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h
index e39edf5c86f2..249619e7e7f2 100644
--- a/arch/xtensa/include/asm/rwsem.h
+++ b/arch/xtensa/include/asm/rwsem.h
@@ -17,44 +17,12 @@
#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
#endif
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
#define RWSEM_ACTIVE_MASK 0x0000ffff
#define RWSEM_WAITING_BIAS (-0x00010000)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
/*
* lock for reading
@@ -160,9 +128,4 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* _XTENSA_RWSEM_H */
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 19df764f6399..f3e5eb43f71c 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -96,16 +96,12 @@ again:
update_process_times(user_mode(get_irq_regs()));
#endif
- write_seqlock(&xtime_lock);
-
- do_timer(1); /* Linux handler in kernel/timer.c */
+ xtime_update(1); /* Linux handler in kernel/time/timekeeping */
/* Note that writing CCOMPARE clears the interrupt. */
next += CCOUNT_PER_JIFFY;
set_linux_timer(next);
-
- write_sequnlock(&xtime_lock);
}
/* Allow platform to do something useful (Wdog). */
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 9b526154c9ba..a2820065927e 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -155,7 +155,7 @@ SECTIONS
INIT_RAM_FS
}
- PERCPU(PAGE_SIZE)
+ PERCPU(XCHAL_ICACHE_LINESIZE, PAGE_SIZE)
/* We need this dummy segment here */
diff --git a/block/blk-core.c b/block/blk-core.c
index 2f4002f79a24..518dd423a5fe 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -352,7 +352,7 @@ void blk_start_queue(struct request_queue *q)
WARN_ON(!irqs_disabled());
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
- __blk_run_queue(q);
+ __blk_run_queue(q, false);
}
EXPORT_SYMBOL(blk_start_queue);
@@ -403,13 +403,14 @@ EXPORT_SYMBOL(blk_sync_queue);
/**
* __blk_run_queue - run a single device queue
* @q: The queue to run
+ * @force_kblockd: Don't run @q->request_fn directly. Use kblockd.
*
* Description:
* See @blk_run_queue. This variant must be called with the queue lock
* held and interrupts disabled.
*
*/
-void __blk_run_queue(struct request_queue *q)
+void __blk_run_queue(struct request_queue *q, bool force_kblockd)
{
blk_remove_plug(q);
@@ -423,7 +424,7 @@ void __blk_run_queue(struct request_queue *q)
* Only recurse once to avoid overrunning the stack, let the unplug
* handling reinvoke the handler shortly if we already got there.
*/
- if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+ if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
q->request_fn(q);
queue_flag_clear(QUEUE_FLAG_REENTER, q);
} else {
@@ -446,7 +447,7 @@ void blk_run_queue(struct request_queue *q)
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
- __blk_run_queue(q);
+ __blk_run_queue(q, false);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(blk_run_queue);
@@ -1053,7 +1054,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
drive_stat_acct(rq, 1);
__elv_add_request(q, rq, where, 0);
- __blk_run_queue(q);
+ __blk_run_queue(q, false);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL(blk_insert_request);
@@ -2610,13 +2611,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
}
EXPORT_SYMBOL(kblockd_schedule_work);
-int kblockd_schedule_delayed_work(struct request_queue *q,
- struct delayed_work *dwork, unsigned long delay)
-{
- return queue_delayed_work(kblockd_workqueue, dwork, delay);
-}
-EXPORT_SYMBOL(kblockd_schedule_delayed_work);
-
int __init blk_dev_init(void)
{
BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 54b123d6563e..b27d0208611b 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -66,10 +66,12 @@ static void blk_flush_complete_seq_end_io(struct request_queue *q,
/*
* Moving a request silently to empty queue_head may stall the
- * queue. Kick the queue in those cases.
+ * queue. Kick the queue in those cases. This function is called
+ * from request completion path and calling directly into
+ * request_fn may confuse the driver. Always use kblockd.
*/
if (was_empty && next_rq)
- __blk_run_queue(q);
+ __blk_run_queue(q, true);
}
static void pre_flush_end_io(struct request *rq, int error)
@@ -130,7 +132,7 @@ static struct request *queue_next_fseq(struct request_queue *q)
BUG();
}
- elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+ elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE);
return rq;
}
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 1a320d2406b0..bd3e8df4d5e2 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -109,7 +109,6 @@ struct bio_batch
atomic_t done;
unsigned long flags;
struct completion *wait;
- bio_end_io_t *end_io;
};
static void bio_batch_end_io(struct bio *bio, int err)
@@ -122,17 +121,14 @@ static void bio_batch_end_io(struct bio *bio, int err)
else
clear_bit(BIO_UPTODATE, &bb->flags);
}
- if (bb) {
- if (bb->end_io)
- bb->end_io(bio, err);
- atomic_inc(&bb->done);
- complete(bb->wait);
- }
+ if (bb)
+ if (atomic_dec_and_test(&bb->done))
+ complete(bb->wait);
bio_put(bio);
}
/**
- * blkdev_issue_zeroout generate number of zero filed write bios
+ * blkdev_issue_zeroout - generate number of zero filed write bios
* @bdev: blockdev to issue
* @sector: start sector
* @nr_sects: number of sectors to write
@@ -150,13 +146,12 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
int ret;
struct bio *bio;
struct bio_batch bb;
- unsigned int sz, issued = 0;
+ unsigned int sz;
DECLARE_COMPLETION_ONSTACK(wait);
- atomic_set(&bb.done, 0);
+ atomic_set(&bb.done, 1);
bb.flags = 1 << BIO_UPTODATE;
bb.wait = &wait;
- bb.end_io = NULL;
submit:
ret = 0;
@@ -185,12 +180,12 @@ submit:
break;
}
ret = 0;
- issued++;
+ atomic_inc(&bb.done);
submit_bio(WRITE, bio);
}
/* Wait for bios in-flight */
- while (issued != atomic_read(&bb.done))
+ if (!atomic_dec_and_test(&bb.done))
wait_for_completion(&wait);
if (!test_bit(BIO_UPTODATE, &bb.flags))
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a89043a3caa4..e36cc10a346c 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -20,6 +20,11 @@ static int throtl_quantum = 32;
/* Throttling is performed over 100ms slice and after that slice is renewed */
static unsigned long throtl_slice = HZ/10; /* 100 ms */
+/* A workqueue to queue throttle related work */
+static struct workqueue_struct *kthrotld_workqueue;
+static void throtl_schedule_delayed_work(struct throtl_data *td,
+ unsigned long delay);
+
struct throtl_rb_root {
struct rb_root rb;
struct rb_node *left;
@@ -345,10 +350,9 @@ static void throtl_schedule_next_dispatch(struct throtl_data *td)
update_min_dispatch_time(st);
if (time_before_eq(st->min_disptime, jiffies))
- throtl_schedule_delayed_work(td->queue, 0);
+ throtl_schedule_delayed_work(td, 0);
else
- throtl_schedule_delayed_work(td->queue,
- (st->min_disptime - jiffies));
+ throtl_schedule_delayed_work(td, (st->min_disptime - jiffies));
}
static inline void
@@ -815,10 +819,10 @@ void blk_throtl_work(struct work_struct *work)
}
/* Call with queue lock held */
-void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay)
+static void
+throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
{
- struct throtl_data *td = q->td;
struct delayed_work *dwork = &td->throtl_work;
if (total_nr_queued(td) > 0) {
@@ -827,12 +831,11 @@ void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay)
* Cancel that and schedule a new one.
*/
__cancel_delayed_work(dwork);
- kblockd_schedule_delayed_work(q, dwork, delay);
+ queue_delayed_work(kthrotld_workqueue, dwork, delay);
throtl_log(td, "schedule work. delay=%lu jiffies=%lu",
delay, jiffies);
}
}
-EXPORT_SYMBOL(throtl_schedule_delayed_work);
static void
throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
@@ -920,7 +923,7 @@ static void throtl_update_blkio_group_read_bps(void *key,
smp_mb__after_atomic_inc();
/* Schedule a work now to process the limit change */
- throtl_schedule_delayed_work(td->queue, 0);
+ throtl_schedule_delayed_work(td, 0);
}
static void throtl_update_blkio_group_write_bps(void *key,
@@ -934,7 +937,7 @@ static void throtl_update_blkio_group_write_bps(void *key,
smp_mb__before_atomic_inc();
atomic_inc(&td->limits_changed);
smp_mb__after_atomic_inc();
- throtl_schedule_delayed_work(td->queue, 0);
+ throtl_schedule_delayed_work(td, 0);
}
static void throtl_update_blkio_group_read_iops(void *key,
@@ -948,7 +951,7 @@ static void throtl_update_blkio_group_read_iops(void *key,
smp_mb__before_atomic_inc();
atomic_inc(&td->limits_changed);
smp_mb__after_atomic_inc();
- throtl_schedule_delayed_work(td->queue, 0);
+ throtl_schedule_delayed_work(td, 0);
}
static void throtl_update_blkio_group_write_iops(void *key,
@@ -962,7 +965,7 @@ static void throtl_update_blkio_group_write_iops(void *key,
smp_mb__before_atomic_inc();
atomic_inc(&td->limits_changed);
smp_mb__after_atomic_inc();
- throtl_schedule_delayed_work(td->queue, 0);
+ throtl_schedule_delayed_work(td, 0);
}
void throtl_shutdown_timer_wq(struct request_queue *q)
@@ -1135,6 +1138,10 @@ void blk_throtl_exit(struct request_queue *q)
static int __init throtl_init(void)
{
+ kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0);
+ if (!kthrotld_workqueue)
+ panic("Failed to create kthrotld\n");
+
blkio_policy_register(&blkio_policy_throtl);
return 0;
}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 7be4c7959625..ea83a4f0c27d 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3355,7 +3355,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
cfqd->busy_queues > 1) {
cfq_del_timer(cfqd, cfqq);
cfq_clear_cfqq_wait_request(cfqq);
- __blk_run_queue(cfqd->queue);
+ __blk_run_queue(cfqd->queue, false);
} else {
cfq_blkiocg_update_idle_time_stats(
&cfqq->cfqg->blkg);
@@ -3370,7 +3370,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
* this new queue is RT and the current one is BE
*/
cfq_preempt_queue(cfqd, cfqq);
- __blk_run_queue(cfqd->queue);
+ __blk_run_queue(cfqd->queue, false);
}
}
@@ -3731,7 +3731,7 @@ static void cfq_kick_queue(struct work_struct *work)
struct request_queue *q = cfqd->queue;
spin_lock_irq(q->queue_lock);
- __blk_run_queue(cfqd->queue);
+ __blk_run_queue(cfqd->queue, false);
spin_unlock_irq(q->queue_lock);
}
diff --git a/block/elevator.c b/block/elevator.c
index 2569512830d3..236e93c1f46c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -602,7 +602,7 @@ void elv_quiesce_start(struct request_queue *q)
*/
elv_drain_elevator(q);
while (q->rq.elvpriv) {
- __blk_run_queue(q);
+ __blk_run_queue(q, false);
spin_unlock_irq(q->queue_lock);
msleep(10);
spin_lock_irq(q->queue_lock);
@@ -651,7 +651,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
* with anything. There's no point in delaying queue
* processing.
*/
- __blk_run_queue(q);
+ __blk_run_queue(q, false);
break;
case ELEVATOR_INSERT_SORT:
diff --git a/block/genhd.c b/block/genhd.c
index 6a5b772aa201..cbf1112a885c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1355,7 +1355,7 @@ int invalidate_partition(struct gendisk *disk, int partno)
struct block_device *bdev = bdget_disk(disk, partno);
if (bdev) {
fsync_bdev(bdev);
- res = __invalidate_device(bdev);
+ res = __invalidate_device(bdev, true);
bdput(bdev);
}
return res;
diff --git a/block/ioctl.c b/block/ioctl.c
index 9049d460fa89..1124cd297263 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -294,9 +294,11 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
return -EINVAL;
if (get_user(n, (int __user *) arg))
return -EFAULT;
- if (!(mode & FMODE_EXCL) &&
- blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
- return -EBUSY;
+ if (!(mode & FMODE_EXCL)) {
+ bdgrab(bdev);
+ if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
+ return -EBUSY;
+ }
ret = set_blocksize(bdev, n);
if (!(mode & FMODE_EXCL))
blkdev_put(bdev, mode | FMODE_EXCL);
diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c
index a854df2a5a4b..fdc67d38660b 100644
--- a/crypto/ablkcipher.c
+++ b/crypto/ablkcipher.c
@@ -141,8 +141,7 @@ err:
if (walk->iv != req->info)
memcpy(req->info, walk->iv, tfm->crt_ablkcipher.ivsize);
- if (walk->iv_buffer)
- kfree(walk->iv_buffer);
+ kfree(walk->iv_buffer);
return err;
}
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 9aac5e58be94..e912ea5def3d 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -146,7 +146,8 @@ static void test_cipher_speed(const char *algo, int enc, unsigned int sec,
unsigned int tcount, u8 *keysize)
{
unsigned int ret, i, j, iv_len;
- const char *key, iv[128];
+ const char *key;
+ char iv[128];
struct crypto_blkcipher *tfm;
struct blkcipher_desc desc;
const char *e;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 27ea9fe9476f..2854865f2434 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -2077,6 +2077,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "ghash",
.test = alg_test_hash,
+ .fips_allowed = 1,
.suite = {
.hash = {
.vecs = ghash_tv_template,
@@ -2453,6 +2454,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "xts(aes)",
.test = alg_test_skcipher,
+ .fips_allowed = 1,
.suite = {
.cipher = {
.enc = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 834af7f2adee..aa6dac05f843 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -451,8 +451,9 @@ static struct hash_testvec rmd320_tv_template[] = {
/*
* SHA1 test vectors from from FIPS PUB 180-1
+ * Long vector from CAVS 5.0
*/
-#define SHA1_TEST_VECTORS 2
+#define SHA1_TEST_VECTORS 3
static struct hash_testvec sha1_tv_template[] = {
{
@@ -467,6 +468,33 @@ static struct hash_testvec sha1_tv_template[] = {
"\x4a\xa1\xf9\x51\x29\xe5\xe5\x46\x70\xf1",
.np = 2,
.tap = { 28, 28 }
+ }, {
+ .plaintext = "\xec\x29\x56\x12\x44\xed\xe7\x06"
+ "\xb6\xeb\x30\xa1\xc3\x71\xd7\x44"
+ "\x50\xa1\x05\xc3\xf9\x73\x5f\x7f"
+ "\xa9\xfe\x38\xcf\x67\xf3\x04\xa5"
+ "\x73\x6a\x10\x6e\x92\xe1\x71\x39"
+ "\xa6\x81\x3b\x1c\x81\xa4\xf3\xd3"
+ "\xfb\x95\x46\xab\x42\x96\xfa\x9f"
+ "\x72\x28\x26\xc0\x66\x86\x9e\xda"
+ "\xcd\x73\xb2\x54\x80\x35\x18\x58"
+ "\x13\xe2\x26\x34\xa9\xda\x44\x00"
+ "\x0d\x95\xa2\x81\xff\x9f\x26\x4e"
+ "\xcc\xe0\xa9\x31\x22\x21\x62\xd0"
+ "\x21\xcc\xa2\x8d\xb5\xf3\xc2\xaa"
+ "\x24\x94\x5a\xb1\xe3\x1c\xb4\x13"
+ "\xae\x29\x81\x0f\xd7\x94\xca\xd5"
+ "\xdf\xaf\x29\xec\x43\xcb\x38\xd1"
+ "\x98\xfe\x4a\xe1\xda\x23\x59\x78"
+ "\x02\x21\x40\x5b\xd6\x71\x2a\x53"
+ "\x05\xda\x4b\x1b\x73\x7f\xce\x7c"
+ "\xd2\x1c\x0e\xb7\x72\x8d\x08\x23"
+ "\x5a\x90\x11",
+ .psize = 163,
+ .digest = "\x97\x01\x11\xc4\xe7\x7b\xcc\x88\xcc\x20"
+ "\x45\x9c\x02\xb6\x9b\x4a\xa8\xf5\x82\x17",
+ .np = 4,
+ .tap = { 63, 64, 31, 5 }
}
};
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 2aa042a5da6d..3a17ca5fff6f 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -7,7 +7,6 @@ menuconfig ACPI
depends on !IA64_HP_SIM
depends on IA64 || X86
depends on PCI
- depends on PM
select PNP
default y
help
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 54784bb42cec..edc25867ad9d 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -416,10 +416,15 @@ struct acpi_gpe_handler_info {
u8 originally_enabled; /* True if GPE was originally enabled */
};
+struct acpi_gpe_notify_object {
+ struct acpi_namespace_node *node;
+ struct acpi_gpe_notify_object *next;
+};
+
union acpi_gpe_dispatch_info {
struct acpi_namespace_node *method_node; /* Method node for this GPE level */
struct acpi_gpe_handler_info *handler; /* Installed GPE handler */
- struct acpi_namespace_node *device_node; /* Parent _PRW device for implicit notify */
+ struct acpi_gpe_notify_object device; /* List of _PRW devices for implicit notify */
};
/*
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index 14988a86066f..f4725212eb48 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -457,6 +457,7 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context)
acpi_status status;
struct acpi_gpe_event_info *local_gpe_event_info;
struct acpi_evaluate_info *info;
+ struct acpi_gpe_notify_object *notify_object;
ACPI_FUNCTION_TRACE(ev_asynch_execute_gpe_method);
@@ -508,10 +509,18 @@ static void ACPI_SYSTEM_XFACE acpi_ev_asynch_execute_gpe_method(void *context)
* from this thread -- because handlers may in turn run other
* control methods.
*/
- status =
- acpi_ev_queue_notify_request(local_gpe_event_info->dispatch.
- device_node,
- ACPI_NOTIFY_DEVICE_WAKE);
+ status = acpi_ev_queue_notify_request(
+ local_gpe_event_info->dispatch.device.node,
+ ACPI_NOTIFY_DEVICE_WAKE);
+
+ notify_object = local_gpe_event_info->dispatch.device.next;
+ while (ACPI_SUCCESS(status) && notify_object) {
+ status = acpi_ev_queue_notify_request(
+ notify_object->node,
+ ACPI_NOTIFY_DEVICE_WAKE);
+ notify_object = notify_object->next;
+ }
+
break;
case ACPI_GPE_DISPATCH_METHOD:
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 3b20a3401b64..52aaff3df562 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -198,7 +198,9 @@ acpi_setup_gpe_for_wake(acpi_handle wake_device,
acpi_status status = AE_BAD_PARAMETER;
struct acpi_gpe_event_info *gpe_event_info;
struct acpi_namespace_node *device_node;
+ struct acpi_gpe_notify_object *notify_object;
acpi_cpu_flags flags;
+ u8 gpe_dispatch_mask;
ACPI_FUNCTION_TRACE(acpi_setup_gpe_for_wake);
@@ -221,27 +223,49 @@ acpi_setup_gpe_for_wake(acpi_handle wake_device,
goto unlock_and_exit;
}
+ if (wake_device == ACPI_ROOT_OBJECT) {
+ goto out;
+ }
+
/*
* If there is no method or handler for this GPE, then the
* wake_device will be notified whenever this GPE fires (aka
* "implicit notify") Note: The GPE is assumed to be
* level-triggered (for windows compatibility).
*/
- if (((gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK) ==
- ACPI_GPE_DISPATCH_NONE) && (wake_device != ACPI_ROOT_OBJECT)) {
+ gpe_dispatch_mask = gpe_event_info->flags & ACPI_GPE_DISPATCH_MASK;
+ if (gpe_dispatch_mask != ACPI_GPE_DISPATCH_NONE
+ && gpe_dispatch_mask != ACPI_GPE_DISPATCH_NOTIFY) {
+ goto out;
+ }
- /* Validate wake_device is of type Device */
+ /* Validate wake_device is of type Device */
- device_node = ACPI_CAST_PTR(struct acpi_namespace_node,
- wake_device);
- if (device_node->type != ACPI_TYPE_DEVICE) {
- goto unlock_and_exit;
- }
+ device_node = ACPI_CAST_PTR(struct acpi_namespace_node, wake_device);
+ if (device_node->type != ACPI_TYPE_DEVICE) {
+ goto unlock_and_exit;
+ }
+
+ if (gpe_dispatch_mask == ACPI_GPE_DISPATCH_NONE) {
gpe_event_info->flags = (ACPI_GPE_DISPATCH_NOTIFY |
ACPI_GPE_LEVEL_TRIGGERED);
- gpe_event_info->dispatch.device_node = device_node;
+ gpe_event_info->dispatch.device.node = device_node;
+ gpe_event_info->dispatch.device.next = NULL;
+ } else {
+ /* There are multiple devices to notify implicitly. */
+
+ notify_object = ACPI_ALLOCATE_ZEROED(sizeof(*notify_object));
+ if (!notify_object) {
+ status = AE_NO_MEMORY;
+ goto unlock_and_exit;
+ }
+
+ notify_object->node = device_node;
+ notify_object->next = gpe_event_info->dispatch.device.next;
+ gpe_event_info->dispatch.device.next = notify_object;
}
+ out:
gpe_event_info->flags |= ACPI_GPE_CAN_WAKE;
status = AE_OK;
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 7ced61f39492..9749980ca6ca 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -40,6 +40,7 @@
#include <acpi/acpi_bus.h>
#include <acpi/acpi_drivers.h>
#include <linux/dmi.h>
+#include <linux/suspend.h>
#include "internal.h"
@@ -1006,8 +1007,7 @@ struct kobject *acpi_kobj;
static int __init acpi_init(void)
{
- int result = 0;
-
+ int result;
if (acpi_disabled) {
printk(KERN_INFO PREFIX "Interpreter disabled.\n");
@@ -1022,29 +1022,18 @@ static int __init acpi_init(void)
init_acpi_device_notify();
result = acpi_bus_init();
-
- if (!result) {
- pci_mmcfg_late_init();
- if (!(pm_flags & PM_APM))
- pm_flags |= PM_ACPI;
- else {
- printk(KERN_INFO PREFIX
- "APM is already active, exiting\n");
- disable_acpi();
- result = -ENODEV;
- }
- } else
+ if (result) {
disable_acpi();
-
- if (acpi_disabled)
return result;
+ }
+ pci_mmcfg_late_init();
acpi_scan_init();
acpi_ec_init();
acpi_debugfs_init();
acpi_sleep_proc_init();
acpi_wakeup_device_init();
- return result;
+ return 0;
}
subsys_initcall(acpi_init);
diff --git a/drivers/acpi/debugfs.c b/drivers/acpi/debugfs.c
index 5df67f1d6c61..384f7abcff77 100644
--- a/drivers/acpi/debugfs.c
+++ b/drivers/acpi/debugfs.c
@@ -26,7 +26,9 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
size_t count, loff_t *ppos)
{
static char *buf;
- static int uncopied_bytes;
+ static u32 max_size;
+ static u32 uncopied_bytes;
+
struct acpi_table_header table;
acpi_status status;
@@ -37,19 +39,24 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
if (copy_from_user(&table, user_buf,
sizeof(struct acpi_table_header)))
return -EFAULT;
- uncopied_bytes = table.length;
- buf = kzalloc(uncopied_bytes, GFP_KERNEL);
+ uncopied_bytes = max_size = table.length;
+ buf = kzalloc(max_size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
}
- if (uncopied_bytes < count) {
- kfree(buf);
+ if (buf == NULL)
+ return -EINVAL;
+
+ if ((*ppos > max_size) ||
+ (*ppos + count > max_size) ||
+ (*ppos + count < count) ||
+ (count > uncopied_bytes))
return -EINVAL;
- }
if (copy_from_user(buf + (*ppos), user_buf, count)) {
kfree(buf);
+ buf = NULL;
return -EFAULT;
}
@@ -59,6 +66,7 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
if (!uncopied_bytes) {
status = acpi_install_method(buf);
kfree(buf);
+ buf = NULL;
if (ACPI_FAILURE(status))
return -EINVAL;
add_taint(TAINT_OVERRIDDEN_ACPI_TABLE);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 5eb25eb3ea48..3b5c3189fd99 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -274,7 +274,7 @@ acpi_table_parse_srat(enum acpi_srat_type id,
int __init acpi_numa_init(void)
{
- int ret = 0;
+ int cnt = 0;
/*
* Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
@@ -288,7 +288,7 @@ int __init acpi_numa_init(void)
acpi_parse_x2apic_affinity, 0);
acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
acpi_parse_processor_affinity, 0);
- ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
+ cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
acpi_parse_memory_affinity,
NR_NODE_MEMBLKS);
}
@@ -297,7 +297,10 @@ int __init acpi_numa_init(void)
acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
acpi_numa_arch_fixup();
- return ret;
+
+ if (cnt <= 0)
+ return cnt ?: -ENOENT;
+ return 0;
}
int acpi_get_pxm(acpi_handle h)
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index c90c76aa7f8b..4a6753009d79 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1589,9 +1589,9 @@ acpi_status __init acpi_os_initialize(void)
acpi_status __init acpi_os_initialize1(void)
{
- kacpid_wq = create_workqueue("kacpid");
- kacpi_notify_wq = create_workqueue("kacpi_notify");
- kacpi_hotplug_wq = create_workqueue("kacpi_hotplug");
+ kacpid_wq = alloc_workqueue("kacpid", 0, 1);
+ kacpi_notify_wq = alloc_workqueue("kacpi_notify", 0, 1);
+ kacpi_hotplug_wq = alloc_workqueue("kacpi_hotplug", 0, 1);
BUG_ON(!kacpid_wq);
BUG_ON(!kacpi_notify_wq);
BUG_ON(!kacpi_hotplug_wq);
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index e9fef94d1039..1850dac8f45c 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -586,7 +586,7 @@ int acpi_suspend(u32 acpi_state)
return -EINVAL;
}
-#ifdef CONFIG_PM_OPS
+#ifdef CONFIG_PM
/**
* acpi_pm_device_sleep_state - return preferred power state of ACPI device
* in the system sleep state given by %acpi_target_sleep_state
@@ -672,7 +672,7 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
*d_min_p = d_min;
return d_max;
}
-#endif /* CONFIG_PM_OPS */
+#endif /* CONFIG_PM */
#ifdef CONFIG_PM_SLEEP
/**
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index c2328aed0836..75afa75a515e 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -202,6 +202,18 @@ config SATA_DWC
If unsure, say N.
+config SATA_DWC_DEBUG
+ bool "Debugging driver version"
+ depends on SATA_DWC
+ help
+ This option enables debugging output in the driver.
+
+config SATA_DWC_VDEBUG
+ bool "Verbose debug output"
+ depends on SATA_DWC_DEBUG
+ help
+ This option enables the taskfile dumping and NCQ debugging.
+
config SATA_MV
tristate "Marvell SATA support"
help
@@ -299,6 +311,12 @@ config PATA_AMD
If unsure, say N.
+config PATA_ARASAN_CF
+ tristate "ARASAN CompactFlash PATA Controller Support"
+ select DMA_ENGINE
+ help
+ Say Y here to support the ARASAN CompactFlash PATA controller
+
config PATA_ARTOP
tristate "ARTOP 6210/6260 PATA support"
depends on PCI
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 27291aad6ca7..8ac64e1aa051 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_SATA_DWC) += sata_dwc_460ex.o
# SFF w/ custom DMA
obj-$(CONFIG_PDC_ADMA) += pdc_adma.o
+obj-$(CONFIG_PATA_ARASAN_CF) += pata_arasan_cf.o
obj-$(CONFIG_PATA_OCTEON_CF) += pata_octeon_cf.o
obj-$(CONFIG_SATA_QSTOR) += sata_qstor.o
obj-$(CONFIG_SATA_SX4) += sata_sx4.o
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index b8d96ce37fc9..e62f693be8ea 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -175,8 +175,7 @@ static const struct ata_port_info ahci_port_info[] = {
{
AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_MSI |
AHCI_HFLAG_MV_PATA | AHCI_HFLAG_NO_PMP),
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA,
+ .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
.pio_mask = ATA_PIO4,
.udma_mask = ATA_UDMA6,
.port_ops = &ahci_ops,
@@ -260,6 +259,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */
{ PCI_VDEVICE(INTEL, 0x1d04), board_ahci }, /* PBG RAID */
{ PCI_VDEVICE(INTEL, 0x1d06), board_ahci }, /* PBG RAID */
+ { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* PBG RAID */
{ PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */
/* JMicron 360/1/3/5/6, match class to avoid IDE function */
@@ -383,6 +383,10 @@ static const struct pci_device_id ahci_pci_tbl[] = {
.class = PCI_CLASS_STORAGE_SATA_AHCI,
.class_mask = 0xffffff,
.driver_data = board_ahci_yes_fbs }, /* 88se9128 */
+ { PCI_DEVICE(0x1b4b, 0x9125),
+ .driver_data = board_ahci_yes_fbs }, /* 88se9125 */
+ { PCI_DEVICE(0x1b4b, 0x91a3),
+ .driver_data = board_ahci_yes_fbs },
/* Promise */
{ PCI_VDEVICE(PROMISE, 0x3f20), board_ahci }, /* PDC42819 */
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 3e606c34f57b..ccaf08122058 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -213,10 +213,8 @@ enum {
/* ap->flags bits */
- AHCI_FLAG_COMMON = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA |
- ATA_FLAG_ACPI_SATA | ATA_FLAG_AN |
- ATA_FLAG_LPM,
+ AHCI_FLAG_COMMON = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
+ ATA_FLAG_ACPI_SATA | ATA_FLAG_AN,
ICH_MAP = 0x90, /* ICH MAP register */
diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c
index 6981f7680a00..721d38bfa339 100644
--- a/drivers/ata/ata_generic.c
+++ b/drivers/ata/ata_generic.c
@@ -237,7 +237,7 @@ static struct pci_device_id ata_generic[] = {
#endif
/* Intel, IDE class device */
{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL,
+ PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL,
.driver_data = ATA_GEN_INTEL_IDER },
/* Must come last. If you add entries adjust this table appropriately */
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL),
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 6cb14ca8ee85..cdec4ab3b159 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -230,7 +230,7 @@ static const struct pci_device_id piix_pci_tbl[] = {
{ 0x8086, 0x2850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 },
/* SATA ports */
-
+
/* 82801EB (ICH5) */
{ 0x8086, 0x24d1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata },
/* 82801EB (ICH5) */
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 8b5ea399a4f4..a791b8ce6294 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -660,8 +660,7 @@ static int ata_acpi_filter_tf(struct ata_device *dev,
* @dev: target ATA device
* @gtf: raw ATA taskfile register set (0x1f1 - 0x1f7)
*
- * Outputs ATA taskfile to standard ATA host controller using MMIO
- * or PIO as indicated by the ATA_FLAG_MMIO flag.
+ * Outputs ATA taskfile to standard ATA host controller.
* Writes the control, feature, nsect, lbal, lbam, and lbah registers.
* Optionally (ATA_TFLAG_LBA48) writes hob_feature, hob_nsect,
* hob_lbal, hob_lbam, and hob_lbah.
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index d4e52e214859..b91e19cab102 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4210,7 +4210,7 @@ static int glob_match (const char *text, const char *pattern)
return 0; /* End of both strings: match */
return 1; /* No match */
}
-
+
static unsigned long ata_dev_blacklisted(const struct ata_device *dev)
{
unsigned char model_num[ATA_ID_PROD_LEN + 1];
@@ -5479,7 +5479,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
ap = kzalloc(sizeof(*ap), GFP_KERNEL);
if (!ap)
return NULL;
-
+
ap->pflags |= ATA_PFLAG_INITIALIZING;
ap->lock = &host->lock;
ap->print_id = -1;
@@ -5887,21 +5887,9 @@ void ata_host_init(struct ata_host *host, struct device *dev,
host->ops = ops;
}
-
-static void async_port_probe(void *data, async_cookie_t cookie)
+int ata_port_probe(struct ata_port *ap)
{
- int rc;
- struct ata_port *ap = data;
-
- /*
- * If we're not allowed to scan this host in parallel,
- * we need to wait until all previous scans have completed
- * before going further.
- * Jeff Garzik says this is only within a controller, so we
- * don't need to wait for port 0, only for later ports.
- */
- if (!(ap->host->flags & ATA_HOST_PARALLEL_SCAN) && ap->port_no != 0)
- async_synchronize_cookie(cookie);
+ int rc = 0;
/* probe */
if (ap->ops->error_handler) {
@@ -5927,23 +5915,33 @@ static void async_port_probe(void *data, async_cookie_t cookie)
DPRINTK("ata%u: bus probe begin\n", ap->print_id);
rc = ata_bus_probe(ap);
DPRINTK("ata%u: bus probe end\n", ap->print_id);
-
- if (rc) {
- /* FIXME: do something useful here?
- * Current libata behavior will
- * tear down everything when
- * the module is removed
- * or the h/w is unplugged.
- */
- }
}
+ return rc;
+}
+
+
+static void async_port_probe(void *data, async_cookie_t cookie)
+{
+ struct ata_port *ap = data;
+
+ /*
+ * If we're not allowed to scan this host in parallel,
+ * we need to wait until all previous scans have completed
+ * before going further.
+ * Jeff Garzik says this is only within a controller, so we
+ * don't need to wait for port 0, only for later ports.
+ */
+ if (!(ap->host->flags & ATA_HOST_PARALLEL_SCAN) && ap->port_no != 0)
+ async_synchronize_cookie(cookie);
+
+ (void)ata_port_probe(ap);
/* in order to keep device order, we need to synchronize at this point */
async_synchronize_cookie(cookie);
ata_scsi_scan_host(ap, 1);
-
}
+
/**
* ata_host_register - register initialized ATA host
* @host: ATA host to register
@@ -5983,7 +5981,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
for (i = 0; i < host->n_ports; i++)
host->ports[i]->print_id = ata_print_id++;
-
+
/* Create associated sysfs transport objects */
for (i = 0; i < host->n_ports; i++) {
rc = ata_tport_add(host->dev,host->ports[i]);
@@ -6471,7 +6469,7 @@ static int __init ata_init(void)
ata_sff_exit();
rc = -ENOMEM;
goto err_out;
- }
+ }
printk(KERN_DEBUG "libata version " DRV_VERSION " loaded.\n");
return 0;
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 17a637877d03..df3f3140c9c7 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -587,11 +587,43 @@ static void ata_eh_unload(struct ata_port *ap)
void ata_scsi_error(struct Scsi_Host *host)
{
struct ata_port *ap = ata_shost_to_port(host);
- int i;
unsigned long flags;
+ LIST_HEAD(eh_work_q);
DPRINTK("ENTER\n");
+ spin_lock_irqsave(host->host_lock, flags);
+ list_splice_init(&host->eh_cmd_q, &eh_work_q);
+ spin_unlock_irqrestore(host->host_lock, flags);
+
+ ata_scsi_cmd_error_handler(host, ap, &eh_work_q);
+
+ /* If we timed raced normal completion and there is nothing to
+ recover nr_timedout == 0 why exactly are we doing error recovery ? */
+ ata_scsi_port_error_handler(host, ap);
+
+ /* finish or retry handled scmd's and clean up */
+ WARN_ON(host->host_failed || !list_empty(&eh_work_q));
+
+ DPRINTK("EXIT\n");
+}
+
+/**
+ * ata_scsi_cmd_error_handler - error callback for a list of commands
+ * @host: scsi host containing the port
+ * @ap: ATA port within the host
+ * @eh_work_q: list of commands to process
+ *
+ * process the given list of commands and return those finished to the
+ * ap->eh_done_q. This function is the first part of the libata error
+ * handler which processes a given list of failed commands.
+ */
+void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
+ struct list_head *eh_work_q)
+{
+ int i;
+ unsigned long flags;
+
/* make sure sff pio task is not running */
ata_sff_flush_pio_task(ap);
@@ -627,7 +659,7 @@ void ata_scsi_error(struct Scsi_Host *host)
if (ap->ops->lost_interrupt)
ap->ops->lost_interrupt(ap);
- list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
+ list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) {
struct ata_queued_cmd *qc;
for (i = 0; i < ATA_MAX_QUEUE; i++) {
@@ -671,8 +703,20 @@ void ata_scsi_error(struct Scsi_Host *host)
} else
spin_unlock_wait(ap->lock);
- /* If we timed raced normal completion and there is nothing to
- recover nr_timedout == 0 why exactly are we doing error recovery ? */
+}
+EXPORT_SYMBOL(ata_scsi_cmd_error_handler);
+
+/**
+ * ata_scsi_port_error_handler - recover the port after the commands
+ * @host: SCSI host containing the port
+ * @ap: the ATA port
+ *
+ * Handle the recovery of the port @ap after all the commands
+ * have been recovered.
+ */
+void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
+{
+ unsigned long flags;
/* invoke error handler */
if (ap->ops->error_handler) {
@@ -761,9 +805,6 @@ void ata_scsi_error(struct Scsi_Host *host)
ap->ops->eng_timeout(ap);
}
- /* finish or retry handled scmd's and clean up */
- WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
-
scsi_eh_flush_done_q(&ap->eh_done_q);
/* clean up */
@@ -784,9 +825,8 @@ void ata_scsi_error(struct Scsi_Host *host)
wake_up_all(&ap->eh_wait_q);
spin_unlock_irqrestore(ap->lock, flags);
-
- DPRINTK("EXIT\n");
}
+EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler);
/**
* ata_port_wait_eh - Wait for the currently pending EH to complete
@@ -1618,7 +1658,7 @@ static void ata_eh_analyze_serror(struct ata_link *link)
* host links. For disabled PMP links, only N bit is
* considered as X bit is left at 1 for link plugging.
*/
- if (link->lpm_policy != ATA_LPM_MAX_POWER)
+ if (link->lpm_policy > ATA_LPM_MAX_POWER)
hotplug_mask = 0; /* hotplug doesn't work w/ LPM */
else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link))
hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 600f6353ecf8..a83419991357 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2056,6 +2056,17 @@ static unsigned int ata_scsiop_inq_83(struct ata_scsi_args *args, u8 *rbuf)
ATA_ID_SERNO_LEN);
num += ATA_ID_SERNO_LEN;
+ if (ata_id_has_wwn(args->id)) {
+ /* SAT defined lu world wide name */
+ /* piv=0, assoc=lu, code_set=binary, designator=NAA */
+ rbuf[num + 0] = 1;
+ rbuf[num + 1] = 3;
+ rbuf[num + 3] = ATA_ID_WWN_LEN;
+ num += 4;
+ ata_id_string(args->id, (unsigned char *) rbuf + num,
+ ATA_ID_WWN, ATA_ID_WWN_LEN);
+ num += ATA_ID_WWN_LEN;
+ }
rbuf[3] = num - 4; /* page len (assume less than 256 bytes) */
return 0;
}
@@ -3759,7 +3770,7 @@ struct ata_port *ata_sas_port_alloc(struct ata_host *host,
return NULL;
ap->port_no = 0;
- ap->lock = shost->host_lock;
+ ap->lock = &host->lock;
ap->pio_mask = port_info->pio_mask;
ap->mwdma_mask = port_info->mwdma_mask;
ap->udma_mask = port_info->udma_mask;
@@ -3821,7 +3832,7 @@ int ata_sas_port_init(struct ata_port *ap)
if (!rc) {
ap->print_id = ata_print_id++;
- rc = ata_bus_probe(ap);
+ rc = ata_port_probe(ap);
}
return rc;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index af6141bb1ba3..cf7acbc0cfcb 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1302,6 +1302,18 @@ fsm_start:
}
EXPORT_SYMBOL_GPL(ata_sff_hsm_move);
+void ata_sff_queue_work(struct work_struct *work)
+{
+ queue_work(ata_sff_wq, work);
+}
+EXPORT_SYMBOL_GPL(ata_sff_queue_work);
+
+void ata_sff_queue_delayed_work(struct delayed_work *dwork, unsigned long delay)
+{
+ queue_delayed_work(ata_sff_wq, dwork, delay);
+}
+EXPORT_SYMBOL_GPL(ata_sff_queue_delayed_work);
+
void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay)
{
struct ata_port *ap = link->ap;
@@ -1311,8 +1323,7 @@ void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay)
ap->sff_pio_task_link = link;
/* may fail if ata_sff_flush_pio_task() in progress */
- queue_delayed_work(ata_sff_wq, &ap->sff_pio_task,
- msecs_to_jiffies(delay));
+ ata_sff_queue_delayed_work(&ap->sff_pio_task, msecs_to_jiffies(delay));
}
EXPORT_SYMBOL_GPL(ata_sff_queue_pio_task);
@@ -1336,7 +1347,7 @@ static void ata_sff_pio_task(struct work_struct *work)
u8 status;
int poll_next;
- BUG_ON(ap->sff_pio_task_link == NULL);
+ BUG_ON(ap->sff_pio_task_link == NULL);
/* qc can be NULL if timeout occurred */
qc = ata_qc_from_tag(ap, link->active_tag);
if (!qc) {
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index a9be110dbf51..773de97988a2 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -103,6 +103,7 @@ extern int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg);
extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg);
extern struct ata_port *ata_port_alloc(struct ata_host *host);
extern const char *sata_spd_string(unsigned int spd);
+extern int ata_port_probe(struct ata_port *ap);
/* libata-acpi.c */
#ifdef CONFIG_ATA_ACPI
diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c
index c8d47034d5e9..91949d997555 100644
--- a/drivers/ata/pata_acpi.c
+++ b/drivers/ata/pata_acpi.c
@@ -245,7 +245,7 @@ static struct ata_port_operations pacpi_ops = {
static int pacpi_init_one (struct pci_dev *pdev, const struct pci_device_id *id)
{
static const struct ata_port_info info = {
- .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST,
+ .flags = ATA_FLAG_SLAVE_POSS,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c
new file mode 100644
index 000000000000..65cee74605b4
--- /dev/null
+++ b/drivers/ata/pata_arasan_cf.c
@@ -0,0 +1,983 @@
+/*
+ * drivers/ata/pata_arasan_cf.c
+ *
+ * Arasan Compact Flash host controller source file
+ *
+ * Copyright (C) 2011 ST Microelectronics
+ * Viresh Kumar <viresh.kumar@st.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+/*
+ * The Arasan CompactFlash Device Controller IP core has three basic modes of
+ * operation: PC card ATA using I/O mode, PC card ATA using memory mode, PC card
+ * ATA using true IDE modes. This driver supports only True IDE mode currently.
+ *
+ * Arasan CF Controller shares global irq register with Arasan XD Controller.
+ *
+ * Tested on arch/arm/mach-spear13xx
+ */
+
+#include <linux/ata.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/libata.h>
+#include <linux/module.h>
+#include <linux/pata_arasan_cf_data.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#define DRIVER_NAME "arasan_cf"
+#define TIMEOUT msecs_to_jiffies(3000)
+
+/* Registers */
+/* CompactFlash Interface Status */
+#define CFI_STS 0x000
+ #define STS_CHG (1)
+ #define BIN_AUDIO_OUT (1 << 1)
+ #define CARD_DETECT1 (1 << 2)
+ #define CARD_DETECT2 (1 << 3)
+ #define INP_ACK (1 << 4)
+ #define CARD_READY (1 << 5)
+ #define IO_READY (1 << 6)
+ #define B16_IO_PORT_SEL (1 << 7)
+/* IRQ */
+#define IRQ_STS 0x004
+/* Interrupt Enable */
+#define IRQ_EN 0x008
+ #define CARD_DETECT_IRQ (1)
+ #define STATUS_CHNG_IRQ (1 << 1)
+ #define MEM_MODE_IRQ (1 << 2)
+ #define IO_MODE_IRQ (1 << 3)
+ #define TRUE_IDE_MODE_IRQ (1 << 8)
+ #define PIO_XFER_ERR_IRQ (1 << 9)
+ #define BUF_AVAIL_IRQ (1 << 10)
+ #define XFER_DONE_IRQ (1 << 11)
+ #define IGNORED_IRQS (STATUS_CHNG_IRQ | MEM_MODE_IRQ | IO_MODE_IRQ |\
+ TRUE_IDE_MODE_IRQ)
+ #define TRUE_IDE_IRQS (CARD_DETECT_IRQ | PIO_XFER_ERR_IRQ |\
+ BUF_AVAIL_IRQ | XFER_DONE_IRQ)
+/* Operation Mode */
+#define OP_MODE 0x00C
+ #define CARD_MODE_MASK (0x3)
+ #define MEM_MODE (0x0)
+ #define IO_MODE (0x1)
+ #define TRUE_IDE_MODE (0x2)
+
+ #define CARD_TYPE_MASK (1 << 2)
+ #define CF_CARD (0)
+ #define CF_PLUS_CARD (1 << 2)
+
+ #define CARD_RESET (1 << 3)
+ #define CFHOST_ENB (1 << 4)
+ #define OUTPUTS_TRISTATE (1 << 5)
+ #define ULTRA_DMA_ENB (1 << 8)
+ #define MULTI_WORD_DMA_ENB (1 << 9)
+ #define DRQ_BLOCK_SIZE_MASK (0x3 << 11)
+ #define DRQ_BLOCK_SIZE_512 (0)
+ #define DRQ_BLOCK_SIZE_1024 (1 << 11)
+ #define DRQ_BLOCK_SIZE_2048 (2 << 11)
+ #define DRQ_BLOCK_SIZE_4096 (3 << 11)
+/* CF Interface Clock Configuration */
+#define CLK_CFG 0x010
+ #define CF_IF_CLK_MASK (0XF)
+/* CF Timing Mode Configuration */
+#define TM_CFG 0x014
+ #define MEM_MODE_TIMING_MASK (0x3)
+ #define MEM_MODE_TIMING_250NS (0x0)
+ #define MEM_MODE_TIMING_120NS (0x1)
+ #define MEM_MODE_TIMING_100NS (0x2)
+ #define MEM_MODE_TIMING_80NS (0x3)
+
+ #define IO_MODE_TIMING_MASK (0x3 << 2)
+ #define IO_MODE_TIMING_250NS (0x0 << 2)
+ #define IO_MODE_TIMING_120NS (0x1 << 2)
+ #define IO_MODE_TIMING_100NS (0x2 << 2)
+ #define IO_MODE_TIMING_80NS (0x3 << 2)
+
+ #define TRUEIDE_PIO_TIMING_MASK (0x7 << 4)
+ #define TRUEIDE_PIO_TIMING_SHIFT 4
+
+ #define TRUEIDE_MWORD_DMA_TIMING_MASK (0x7 << 7)
+ #define TRUEIDE_MWORD_DMA_TIMING_SHIFT 7
+
+ #define ULTRA_DMA_TIMING_MASK (0x7 << 10)
+ #define ULTRA_DMA_TIMING_SHIFT 10
+/* CF Transfer Address */
+#define XFER_ADDR 0x014
+ #define XFER_ADDR_MASK (0x7FF)
+ #define MAX_XFER_COUNT 0x20000u
+/* Transfer Control */
+#define XFER_CTR 0x01C
+ #define XFER_COUNT_MASK (0x3FFFF)
+ #define ADDR_INC_DISABLE (1 << 24)
+ #define XFER_WIDTH_MASK (1 << 25)
+ #define XFER_WIDTH_8B (0)
+ #define XFER_WIDTH_16B (1 << 25)
+
+ #define MEM_TYPE_MASK (1 << 26)
+ #define MEM_TYPE_COMMON (0)
+ #define MEM_TYPE_ATTRIBUTE (1 << 26)
+
+ #define MEM_IO_XFER_MASK (1 << 27)
+ #define MEM_XFER (0)
+ #define IO_XFER (1 << 27)
+
+ #define DMA_XFER_MODE (1 << 28)
+
+ #define AHB_BUS_NORMAL_PIO_OPRTN (~(1 << 29))
+ #define XFER_DIR_MASK (1 << 30)
+ #define XFER_READ (0)
+ #define XFER_WRITE (1 << 30)
+
+ #define XFER_START (1 << 31)
+/* Write Data Port */
+#define WRITE_PORT 0x024
+/* Read Data Port */
+#define READ_PORT 0x028
+/* ATA Data Port */
+#define ATA_DATA_PORT 0x030
+ #define ATA_DATA_PORT_MASK (0xFFFF)
+/* ATA Error/Features */
+#define ATA_ERR_FTR 0x034
+/* ATA Sector Count */
+#define ATA_SC 0x038
+/* ATA Sector Number */
+#define ATA_SN 0x03C
+/* ATA Cylinder Low */
+#define ATA_CL 0x040
+/* ATA Cylinder High */
+#define ATA_CH 0x044
+/* ATA Select Card/Head */
+#define ATA_SH 0x048
+/* ATA Status-Command */
+#define ATA_STS_CMD 0x04C
+/* ATA Alternate Status/Device Control */
+#define ATA_ASTS_DCTR 0x050
+/* Extended Write Data Port 0x200-0x3FC */
+#define EXT_WRITE_PORT 0x200
+/* Extended Read Data Port 0x400-0x5FC */
+#define EXT_READ_PORT 0x400
+ #define FIFO_SIZE 0x200u
+/* Global Interrupt Status */
+#define GIRQ_STS 0x800
+/* Global Interrupt Status enable */
+#define GIRQ_STS_EN 0x804
+/* Global Interrupt Signal enable */
+#define GIRQ_SGN_EN 0x808
+ #define GIRQ_CF (1)
+ #define GIRQ_XD (1 << 1)
+
+/* Compact Flash Controller Dev Structure */
+struct arasan_cf_dev {
+ /* pointer to ata_host structure */
+ struct ata_host *host;
+ /* clk structure, only if HAVE_CLK is defined */
+#ifdef CONFIG_HAVE_CLK
+ struct clk *clk;
+#endif
+
+ /* physical base address of controller */
+ dma_addr_t pbase;
+ /* virtual base address of controller */
+ void __iomem *vbase;
+ /* irq number*/
+ int irq;
+
+ /* status to be updated to framework regarding DMA transfer */
+ u8 dma_status;
+ /* Card is present or Not */
+ u8 card_present;
+
+ /* dma specific */
+ /* Completion for transfer complete interrupt from controller */
+ struct completion cf_completion;
+ /* Completion for DMA transfer complete. */
+ struct completion dma_completion;
+ /* Dma channel allocated */
+ struct dma_chan *dma_chan;
+ /* Mask for DMA transfers */
+ dma_cap_mask_t mask;
+ /* dma channel private data */
+ void *dma_priv;
+ /* DMA transfer work */
+ struct work_struct work;
+ /* DMA delayed finish work */
+ struct delayed_work dwork;
+ /* qc to be transferred using DMA */
+ struct ata_queued_cmd *qc;
+};
+
+static struct scsi_host_template arasan_cf_sht = {
+ ATA_BASE_SHT(DRIVER_NAME),
+ .sg_tablesize = SG_NONE,
+ .dma_boundary = 0xFFFFFFFFUL,
+};
+
+static void cf_dumpregs(struct arasan_cf_dev *acdev)
+{
+ struct device *dev = acdev->host->dev;
+
+ dev_dbg(dev, ": =========== REGISTER DUMP ===========");
+ dev_dbg(dev, ": CFI_STS: %x", readl(acdev->vbase + CFI_STS));
+ dev_dbg(dev, ": IRQ_STS: %x", readl(acdev->vbase + IRQ_STS));
+ dev_dbg(dev, ": IRQ_EN: %x", readl(acdev->vbase + IRQ_EN));
+ dev_dbg(dev, ": OP_MODE: %x", readl(acdev->vbase + OP_MODE));
+ dev_dbg(dev, ": CLK_CFG: %x", readl(acdev->vbase + CLK_CFG));
+ dev_dbg(dev, ": TM_CFG: %x", readl(acdev->vbase + TM_CFG));
+ dev_dbg(dev, ": XFER_CTR: %x", readl(acdev->vbase + XFER_CTR));
+ dev_dbg(dev, ": GIRQ_STS: %x", readl(acdev->vbase + GIRQ_STS));
+ dev_dbg(dev, ": GIRQ_STS_EN: %x", readl(acdev->vbase + GIRQ_STS_EN));
+ dev_dbg(dev, ": GIRQ_SGN_EN: %x", readl(acdev->vbase + GIRQ_SGN_EN));
+ dev_dbg(dev, ": =====================================");
+}
+
+/* Enable/Disable global interrupts shared between CF and XD ctrlr. */
+static void cf_ginterrupt_enable(struct arasan_cf_dev *acdev, bool enable)
+{
+ /* enable should be 0 or 1 */
+ writel(enable, acdev->vbase + GIRQ_STS_EN);
+ writel(enable, acdev->vbase + GIRQ_SGN_EN);
+}
+
+/* Enable/Disable CF interrupts */
+static inline void
+cf_interrupt_enable(struct arasan_cf_dev *acdev, u32 mask, bool enable)
+{
+ u32 val = readl(acdev->vbase + IRQ_EN);
+ /* clear & enable/disable irqs */
+ if (enable) {
+ writel(mask, acdev->vbase + IRQ_STS);
+ writel(val | mask, acdev->vbase + IRQ_EN);
+ } else
+ writel(val & ~mask, acdev->vbase + IRQ_EN);
+}
+
+static inline void cf_card_reset(struct arasan_cf_dev *acdev)
+{
+ u32 val = readl(acdev->vbase + OP_MODE);
+
+ writel(val | CARD_RESET, acdev->vbase + OP_MODE);
+ udelay(200);
+ writel(val & ~CARD_RESET, acdev->vbase + OP_MODE);
+}
+
+static inline void cf_ctrl_reset(struct arasan_cf_dev *acdev)
+{
+ writel(readl(acdev->vbase + OP_MODE) & ~CFHOST_ENB,
+ acdev->vbase + OP_MODE);
+ writel(readl(acdev->vbase + OP_MODE) | CFHOST_ENB,
+ acdev->vbase + OP_MODE);
+}
+
+static void cf_card_detect(struct arasan_cf_dev *acdev, bool hotplugged)
+{
+ struct ata_port *ap = acdev->host->ports[0];
+ struct ata_eh_info *ehi = &ap->link.eh_info;
+ u32 val = readl(acdev->vbase + CFI_STS);
+
+ /* Both CD1 & CD2 should be low if card inserted completely */
+ if (!(val & (CARD_DETECT1 | CARD_DETECT2))) {
+ if (acdev->card_present)
+ return;
+ acdev->card_present = 1;
+ cf_card_reset(acdev);
+ } else {
+ if (!acdev->card_present)
+ return;
+ acdev->card_present = 0;
+ }
+
+ if (hotplugged) {
+ ata_ehi_hotplugged(ehi);
+ ata_port_freeze(ap);
+ }
+}
+
+static int cf_init(struct arasan_cf_dev *acdev)
+{
+ struct arasan_cf_pdata *pdata = dev_get_platdata(acdev->host->dev);
+ unsigned long flags;
+ int ret = 0;
+
+#ifdef CONFIG_HAVE_CLK
+ ret = clk_enable(acdev->clk);
+ if (ret) {
+ dev_dbg(acdev->host->dev, "clock enable failed");
+ return ret;
+ }
+#endif
+
+ spin_lock_irqsave(&acdev->host->lock, flags);
+ /* configure CF interface clock */
+ writel((pdata->cf_if_clk <= CF_IF_CLK_200M) ? pdata->cf_if_clk :
+ CF_IF_CLK_166M, acdev->vbase + CLK_CFG);
+
+ writel(TRUE_IDE_MODE | CFHOST_ENB, acdev->vbase + OP_MODE);
+ cf_interrupt_enable(acdev, CARD_DETECT_IRQ, 1);
+ cf_ginterrupt_enable(acdev, 1);
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+
+ return ret;
+}
+
+static void cf_exit(struct arasan_cf_dev *acdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&acdev->host->lock, flags);
+ cf_ginterrupt_enable(acdev, 0);
+ cf_interrupt_enable(acdev, TRUE_IDE_IRQS, 0);
+ cf_card_reset(acdev);
+ writel(readl(acdev->vbase + OP_MODE) & ~CFHOST_ENB,
+ acdev->vbase + OP_MODE);
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+#ifdef CONFIG_HAVE_CLK
+ clk_disable(acdev->clk);
+#endif
+}
+
+static void dma_callback(void *dev)
+{
+ struct arasan_cf_dev *acdev = (struct arasan_cf_dev *) dev;
+
+ complete(&acdev->dma_completion);
+}
+
+static bool filter(struct dma_chan *chan, void *slave)
+{
+ chan->private = slave;
+ return true;
+}
+
+static inline void dma_complete(struct arasan_cf_dev *acdev)
+{
+ struct ata_queued_cmd *qc = acdev->qc;
+ unsigned long flags;
+
+ acdev->qc = NULL;
+ ata_sff_interrupt(acdev->irq, acdev->host);
+
+ spin_lock_irqsave(&acdev->host->lock, flags);
+ if (unlikely(qc->err_mask) && ata_is_dma(qc->tf.protocol))
+ ata_ehi_push_desc(&qc->ap->link.eh_info, "DMA Failed: Timeout");
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+}
+
+static inline int wait4buf(struct arasan_cf_dev *acdev)
+{
+ if (!wait_for_completion_timeout(&acdev->cf_completion, TIMEOUT)) {
+ u32 rw = acdev->qc->tf.flags & ATA_TFLAG_WRITE;
+
+ dev_err(acdev->host->dev, "%s TimeOut", rw ? "write" : "read");
+ return -ETIMEDOUT;
+ }
+
+ /* Check if PIO Error interrupt has occured */
+ if (acdev->dma_status & ATA_DMA_ERR)
+ return -EAGAIN;
+
+ return 0;
+}
+
+static int
+dma_xfer(struct arasan_cf_dev *acdev, dma_addr_t src, dma_addr_t dest, u32 len)
+{
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *chan = acdev->dma_chan;
+ dma_cookie_t cookie;
+ unsigned long flags = DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
+ DMA_COMPL_SKIP_DEST_UNMAP;
+ int ret = 0;
+
+ tx = chan->device->device_prep_dma_memcpy(chan, dest, src, len, flags);
+ if (!tx) {
+ dev_err(acdev->host->dev, "device_prep_dma_memcpy failed\n");
+ return -EAGAIN;
+ }
+
+ tx->callback = dma_callback;
+ tx->callback_param = acdev;
+ cookie = tx->tx_submit(tx);
+
+ ret = dma_submit_error(cookie);
+ if (ret) {
+ dev_err(acdev->host->dev, "dma_submit_error\n");
+ return ret;
+ }
+
+ chan->device->device_issue_pending(chan);
+
+ /* Wait for DMA to complete */
+ if (!wait_for_completion_timeout(&acdev->dma_completion, TIMEOUT)) {
+ chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+ dev_err(acdev->host->dev, "wait_for_completion_timeout\n");
+ return -ETIMEDOUT;
+ }
+
+ return ret;
+}
+
+static int sg_xfer(struct arasan_cf_dev *acdev, struct scatterlist *sg)
+{
+ dma_addr_t dest = 0, src = 0;
+ u32 xfer_cnt, sglen, dma_len, xfer_ctr;
+ u32 write = acdev->qc->tf.flags & ATA_TFLAG_WRITE;
+ unsigned long flags;
+ int ret = 0;
+
+ sglen = sg_dma_len(sg);
+ if (write) {
+ src = sg_dma_address(sg);
+ dest = acdev->pbase + EXT_WRITE_PORT;
+ } else {
+ dest = sg_dma_address(sg);
+ src = acdev->pbase + EXT_READ_PORT;
+ }
+
+ /*
+ * For each sg:
+ * MAX_XFER_COUNT data will be transferred before we get transfer
+ * complete interrupt. Inbetween after FIFO_SIZE data
+ * buffer available interrupt will be generated. At this time we will
+ * fill FIFO again: max FIFO_SIZE data.
+ */
+ while (sglen) {
+ xfer_cnt = min(sglen, MAX_XFER_COUNT);
+ spin_lock_irqsave(&acdev->host->lock, flags);
+ xfer_ctr = readl(acdev->vbase + XFER_CTR) &
+ ~XFER_COUNT_MASK;
+ writel(xfer_ctr | xfer_cnt | XFER_START,
+ acdev->vbase + XFER_CTR);
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+
+ /* continue dma xfers untill current sg is completed */
+ while (xfer_cnt) {
+ /* wait for read to complete */
+ if (!write) {
+ ret = wait4buf(acdev);
+ if (ret)
+ goto fail;
+ }
+
+ /* read/write FIFO in chunk of FIFO_SIZE */
+ dma_len = min(xfer_cnt, FIFO_SIZE);
+ ret = dma_xfer(acdev, src, dest, dma_len);
+ if (ret) {
+ dev_err(acdev->host->dev, "dma failed");
+ goto fail;
+ }
+
+ if (write)
+ src += dma_len;
+ else
+ dest += dma_len;
+
+ sglen -= dma_len;
+ xfer_cnt -= dma_len;
+
+ /* wait for write to complete */
+ if (write) {
+ ret = wait4buf(acdev);
+ if (ret)
+ goto fail;
+ }
+ }
+ }
+
+fail:
+ spin_lock_irqsave(&acdev->host->lock, flags);
+ writel(readl(acdev->vbase + XFER_CTR) & ~XFER_START,
+ acdev->vbase + XFER_CTR);
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+
+ return ret;
+}
+
+/*
+ * This routine uses External DMA controller to read/write data to FIFO of CF
+ * controller. There are two xfer related interrupt supported by CF controller:
+ * - buf_avail: This interrupt is generated as soon as we have buffer of 512
+ * bytes available for reading or empty buffer available for writing.
+ * - xfer_done: This interrupt is generated on transfer of "xfer_size" amount of
+ * data to/from FIFO. xfer_size is programmed in XFER_CTR register.
+ *
+ * Max buffer size = FIFO_SIZE = 512 Bytes.
+ * Max xfer_size = MAX_XFER_COUNT = 256 KB.
+ */
+static void data_xfer(struct work_struct *work)
+{
+ struct arasan_cf_dev *acdev = container_of(work, struct arasan_cf_dev,
+ work);
+ struct ata_queued_cmd *qc = acdev->qc;
+ struct scatterlist *sg;
+ unsigned long flags;
+ u32 temp;
+ int ret = 0;
+
+ /* request dma channels */
+ /* dma_request_channel may sleep, so calling from process context */
+ acdev->dma_chan = dma_request_channel(acdev->mask, filter,
+ acdev->dma_priv);
+ if (!acdev->dma_chan) {
+ dev_err(acdev->host->dev, "Unable to get dma_chan\n");
+ goto chan_request_fail;
+ }
+
+ for_each_sg(qc->sg, sg, qc->n_elem, temp) {
+ ret = sg_xfer(acdev, sg);
+ if (ret)
+ break;
+ }
+
+ dma_release_channel(acdev->dma_chan);
+
+ /* data xferred successfully */
+ if (!ret) {
+ u32 status;
+
+ spin_lock_irqsave(&acdev->host->lock, flags);
+ status = ioread8(qc->ap->ioaddr.altstatus_addr);
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+ if (status & (ATA_BUSY | ATA_DRQ)) {
+ ata_sff_queue_delayed_work(&acdev->dwork, 1);
+ return;
+ }
+
+ goto sff_intr;
+ }
+
+ cf_dumpregs(acdev);
+
+chan_request_fail:
+ spin_lock_irqsave(&acdev->host->lock, flags);
+ /* error when transfering data to/from memory */
+ qc->err_mask |= AC_ERR_HOST_BUS;
+ qc->ap->hsm_task_state = HSM_ST_ERR;
+
+ cf_ctrl_reset(acdev);
+ spin_unlock_irqrestore(qc->ap->lock, flags);
+sff_intr:
+ dma_complete(acdev);
+}
+
+static void delayed_finish(struct work_struct *work)
+{
+ struct arasan_cf_dev *acdev = container_of(work, struct arasan_cf_dev,
+ dwork.work);
+ struct ata_queued_cmd *qc = acdev->qc;
+ unsigned long flags;
+ u8 status;
+
+ spin_lock_irqsave(&acdev->host->lock, flags);
+ status = ioread8(qc->ap->ioaddr.altstatus_addr);
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+
+ if (status & (ATA_BUSY | ATA_DRQ))
+ ata_sff_queue_delayed_work(&acdev->dwork, 1);
+ else
+ dma_complete(acdev);
+}
+
+static irqreturn_t arasan_cf_interrupt(int irq, void *dev)
+{
+ struct arasan_cf_dev *acdev = ((struct ata_host *)dev)->private_data;
+ unsigned long flags;
+ u32 irqsts;
+
+ irqsts = readl(acdev->vbase + GIRQ_STS);
+ if (!(irqsts & GIRQ_CF))
+ return IRQ_NONE;
+
+ spin_lock_irqsave(&acdev->host->lock, flags);
+ irqsts = readl(acdev->vbase + IRQ_STS);
+ writel(irqsts, acdev->vbase + IRQ_STS); /* clear irqs */
+ writel(GIRQ_CF, acdev->vbase + GIRQ_STS); /* clear girqs */
+
+ /* handle only relevant interrupts */
+ irqsts &= ~IGNORED_IRQS;
+
+ if (irqsts & CARD_DETECT_IRQ) {
+ cf_card_detect(acdev, 1);
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+ return IRQ_HANDLED;
+ }
+
+ if (irqsts & PIO_XFER_ERR_IRQ) {
+ acdev->dma_status = ATA_DMA_ERR;
+ writel(readl(acdev->vbase + XFER_CTR) & ~XFER_START,
+ acdev->vbase + XFER_CTR);
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+ complete(&acdev->cf_completion);
+ dev_err(acdev->host->dev, "pio xfer err irq\n");
+ return IRQ_HANDLED;
+ }
+
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+
+ if (irqsts & BUF_AVAIL_IRQ) {
+ complete(&acdev->cf_completion);
+ return IRQ_HANDLED;
+ }
+
+ if (irqsts & XFER_DONE_IRQ) {
+ struct ata_queued_cmd *qc = acdev->qc;
+
+ /* Send Complete only for write */
+ if (qc->tf.flags & ATA_TFLAG_WRITE)
+ complete(&acdev->cf_completion);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void arasan_cf_freeze(struct ata_port *ap)
+{
+ struct arasan_cf_dev *acdev = ap->host->private_data;
+
+ /* stop transfer and reset controller */
+ writel(readl(acdev->vbase + XFER_CTR) & ~XFER_START,
+ acdev->vbase + XFER_CTR);
+ cf_ctrl_reset(acdev);
+ acdev->dma_status = ATA_DMA_ERR;
+
+ ata_sff_dma_pause(ap);
+ ata_sff_freeze(ap);
+}
+
+void arasan_cf_error_handler(struct ata_port *ap)
+{
+ struct arasan_cf_dev *acdev = ap->host->private_data;
+
+ /*
+ * DMA transfers using an external DMA controller may be scheduled.
+ * Abort them before handling error. Refer data_xfer() for further
+ * details.
+ */
+ cancel_work_sync(&acdev->work);
+ cancel_delayed_work_sync(&acdev->dwork);
+ return ata_sff_error_handler(ap);
+}
+
+static void arasan_cf_dma_start(struct arasan_cf_dev *acdev)
+{
+ u32 xfer_ctr = readl(acdev->vbase + XFER_CTR) & ~XFER_DIR_MASK;
+ u32 write = acdev->qc->tf.flags & ATA_TFLAG_WRITE;
+
+ xfer_ctr |= write ? XFER_WRITE : XFER_READ;
+ writel(xfer_ctr, acdev->vbase + XFER_CTR);
+
+ acdev->qc->ap->ops->sff_exec_command(acdev->qc->ap, &acdev->qc->tf);
+ ata_sff_queue_work(&acdev->work);
+}
+
+unsigned int arasan_cf_qc_issue(struct ata_queued_cmd *qc)
+{
+ struct ata_port *ap = qc->ap;
+ struct arasan_cf_dev *acdev = ap->host->private_data;
+
+ /* defer PIO handling to sff_qc_issue */
+ if (!ata_is_dma(qc->tf.protocol))
+ return ata_sff_qc_issue(qc);
+
+ /* select the device */
+ ata_wait_idle(ap);
+ ata_sff_dev_select(ap, qc->dev->devno);
+ ata_wait_idle(ap);
+
+ /* start the command */
+ switch (qc->tf.protocol) {
+ case ATA_PROT_DMA:
+ WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING);
+
+ ap->ops->sff_tf_load(ap, &qc->tf);
+ acdev->dma_status = 0;
+ acdev->qc = qc;
+ arasan_cf_dma_start(acdev);
+ ap->hsm_task_state = HSM_ST_LAST;
+ break;
+
+ default:
+ WARN_ON(1);
+ return AC_ERR_SYSTEM;
+ }
+
+ return 0;
+}
+
+static void arasan_cf_set_piomode(struct ata_port *ap, struct ata_device *adev)
+{
+ struct arasan_cf_dev *acdev = ap->host->private_data;
+ u8 pio = adev->pio_mode - XFER_PIO_0;
+ unsigned long flags;
+ u32 val;
+
+ /* Arasan ctrl supports Mode0 -> Mode6 */
+ if (pio > 6) {
+ dev_err(ap->dev, "Unknown PIO mode\n");
+ return;
+ }
+
+ spin_lock_irqsave(&acdev->host->lock, flags);
+ val = readl(acdev->vbase + OP_MODE) &
+ ~(ULTRA_DMA_ENB | MULTI_WORD_DMA_ENB | DRQ_BLOCK_SIZE_MASK);
+ writel(val, acdev->vbase + OP_MODE);
+ val = readl(acdev->vbase + TM_CFG) & ~TRUEIDE_PIO_TIMING_MASK;
+ val |= pio << TRUEIDE_PIO_TIMING_SHIFT;
+ writel(val, acdev->vbase + TM_CFG);
+
+ cf_interrupt_enable(acdev, BUF_AVAIL_IRQ | XFER_DONE_IRQ, 0);
+ cf_interrupt_enable(acdev, PIO_XFER_ERR_IRQ, 1);
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+}
+
+static void arasan_cf_set_dmamode(struct ata_port *ap, struct ata_device *adev)
+{
+ struct arasan_cf_dev *acdev = ap->host->private_data;
+ u32 opmode, tmcfg, dma_mode = adev->dma_mode;
+ unsigned long flags;
+
+ spin_lock_irqsave(&acdev->host->lock, flags);
+ opmode = readl(acdev->vbase + OP_MODE) &
+ ~(MULTI_WORD_DMA_ENB | ULTRA_DMA_ENB);
+ tmcfg = readl(acdev->vbase + TM_CFG);
+
+ if ((dma_mode >= XFER_UDMA_0) && (dma_mode <= XFER_UDMA_6)) {
+ opmode |= ULTRA_DMA_ENB;
+ tmcfg &= ~ULTRA_DMA_TIMING_MASK;
+ tmcfg |= (dma_mode - XFER_UDMA_0) << ULTRA_DMA_TIMING_SHIFT;
+ } else if ((dma_mode >= XFER_MW_DMA_0) && (dma_mode <= XFER_MW_DMA_4)) {
+ opmode |= MULTI_WORD_DMA_ENB;
+ tmcfg &= ~TRUEIDE_MWORD_DMA_TIMING_MASK;
+ tmcfg |= (dma_mode - XFER_MW_DMA_0) <<
+ TRUEIDE_MWORD_DMA_TIMING_SHIFT;
+ } else {
+ dev_err(ap->dev, "Unknown DMA mode\n");
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+ return;
+ }
+
+ writel(opmode, acdev->vbase + OP_MODE);
+ writel(tmcfg, acdev->vbase + TM_CFG);
+ writel(DMA_XFER_MODE, acdev->vbase + XFER_CTR);
+
+ cf_interrupt_enable(acdev, PIO_XFER_ERR_IRQ, 0);
+ cf_interrupt_enable(acdev, BUF_AVAIL_IRQ | XFER_DONE_IRQ, 1);
+ spin_unlock_irqrestore(&acdev->host->lock, flags);
+}
+
+static struct ata_port_operations arasan_cf_ops = {
+ .inherits = &ata_sff_port_ops,
+ .freeze = arasan_cf_freeze,
+ .error_handler = arasan_cf_error_handler,
+ .qc_issue = arasan_cf_qc_issue,
+ .set_piomode = arasan_cf_set_piomode,
+ .set_dmamode = arasan_cf_set_dmamode,
+};
+
+static int __devinit arasan_cf_probe(struct platform_device *pdev)
+{
+ struct arasan_cf_dev *acdev;
+ struct arasan_cf_pdata *pdata = dev_get_platdata(&pdev->dev);
+ struct ata_host *host;
+ struct ata_port *ap;
+ struct resource *res;
+ irq_handler_t irq_handler = NULL;
+ int ret = 0;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -EINVAL;
+
+ if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
+ DRIVER_NAME)) {
+ dev_warn(&pdev->dev, "Failed to get memory region resource\n");
+ return -ENOENT;
+ }
+
+ acdev = devm_kzalloc(&pdev->dev, sizeof(*acdev), GFP_KERNEL);
+ if (!acdev) {
+ dev_warn(&pdev->dev, "kzalloc fail\n");
+ return -ENOMEM;
+ }
+
+ /* if irq is 0, support only PIO */
+ acdev->irq = platform_get_irq(pdev, 0);
+ if (acdev->irq)
+ irq_handler = arasan_cf_interrupt;
+ else
+ pdata->quirk |= CF_BROKEN_MWDMA | CF_BROKEN_UDMA;
+
+ acdev->pbase = res->start;
+ acdev->vbase = devm_ioremap_nocache(&pdev->dev, res->start,
+ resource_size(res));
+ if (!acdev->vbase) {
+ dev_warn(&pdev->dev, "ioremap fail\n");
+ return -ENOMEM;
+ }
+
+#ifdef CONFIG_HAVE_CLK
+ acdev->clk = clk_get(&pdev->dev, NULL);
+ if (IS_ERR(acdev->clk)) {
+ dev_warn(&pdev->dev, "Clock not found\n");
+ return PTR_ERR(acdev->clk);
+ }
+#endif
+
+ /* allocate host */
+ host = ata_host_alloc(&pdev->dev, 1);
+ if (!host) {
+ ret = -ENOMEM;
+ dev_warn(&pdev->dev, "alloc host fail\n");
+ goto free_clk;
+ }
+
+ ap = host->ports[0];
+ host->private_data = acdev;
+ acdev->host = host;
+ ap->ops = &arasan_cf_ops;
+ ap->pio_mask = ATA_PIO6;
+ ap->mwdma_mask = ATA_MWDMA4;
+ ap->udma_mask = ATA_UDMA6;
+
+ init_completion(&acdev->cf_completion);
+ init_completion(&acdev->dma_completion);
+ INIT_WORK(&acdev->work, data_xfer);
+ INIT_DELAYED_WORK(&acdev->dwork, delayed_finish);
+ dma_cap_set(DMA_MEMCPY, acdev->mask);
+ acdev->dma_priv = pdata->dma_priv;
+
+ /* Handle platform specific quirks */
+ if (pdata->quirk) {
+ if (pdata->quirk & CF_BROKEN_PIO) {
+ ap->ops->set_piomode = NULL;
+ ap->pio_mask = 0;
+ }
+ if (pdata->quirk & CF_BROKEN_MWDMA)
+ ap->mwdma_mask = 0;
+ if (pdata->quirk & CF_BROKEN_UDMA)
+ ap->udma_mask = 0;
+ }
+ ap->flags |= ATA_FLAG_PIO_POLLING | ATA_FLAG_NO_ATAPI;
+
+ ap->ioaddr.cmd_addr = acdev->vbase + ATA_DATA_PORT;
+ ap->ioaddr.data_addr = acdev->vbase + ATA_DATA_PORT;
+ ap->ioaddr.error_addr = acdev->vbase + ATA_ERR_FTR;
+ ap->ioaddr.feature_addr = acdev->vbase + ATA_ERR_FTR;
+ ap->ioaddr.nsect_addr = acdev->vbase + ATA_SC;
+ ap->ioaddr.lbal_addr = acdev->vbase + ATA_SN;
+ ap->ioaddr.lbam_addr = acdev->vbase + ATA_CL;
+ ap->ioaddr.lbah_addr = acdev->vbase + ATA_CH;
+ ap->ioaddr.device_addr = acdev->vbase + ATA_SH;
+ ap->ioaddr.status_addr = acdev->vbase + ATA_STS_CMD;
+ ap->ioaddr.command_addr = acdev->vbase + ATA_STS_CMD;
+ ap->ioaddr.altstatus_addr = acdev->vbase + ATA_ASTS_DCTR;
+ ap->ioaddr.ctl_addr = acdev->vbase + ATA_ASTS_DCTR;
+
+ ata_port_desc(ap, "phy_addr %llx virt_addr %p",
+ (unsigned long long) res->start, acdev->vbase);
+
+ ret = cf_init(acdev);
+ if (ret)
+ goto free_clk;
+
+ cf_card_detect(acdev, 0);
+
+ return ata_host_activate(host, acdev->irq, irq_handler, 0,
+ &arasan_cf_sht);
+
+free_clk:
+#ifdef CONFIG_HAVE_CLK
+ clk_put(acdev->clk);
+#endif
+ return ret;
+}
+
+static int __devexit arasan_cf_remove(struct platform_device *pdev)
+{
+ struct ata_host *host = dev_get_drvdata(&pdev->dev);
+ struct arasan_cf_dev *acdev = host->ports[0]->private_data;
+
+ ata_host_detach(host);
+ cf_exit(acdev);
+#ifdef CONFIG_HAVE_CLK
+ clk_put(acdev->clk);
+#endif
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int arasan_cf_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct ata_host *host = dev_get_drvdata(&pdev->dev);
+ struct arasan_cf_dev *acdev = host->ports[0]->private_data;
+
+ if (acdev->dma_chan) {
+ acdev->dma_chan->device->device_control(acdev->dma_chan,
+ DMA_TERMINATE_ALL, 0);
+ dma_release_channel(acdev->dma_chan);
+ }
+ cf_exit(acdev);
+ return ata_host_suspend(host, PMSG_SUSPEND);
+}
+
+static int arasan_cf_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct ata_host *host = dev_get_drvdata(&pdev->dev);
+ struct arasan_cf_dev *acdev = host->ports[0]->private_data;
+
+ cf_init(acdev);
+ ata_host_resume(host);
+
+ return 0;
+}
+
+static const struct dev_pm_ops arasan_cf_pm_ops = {
+ .suspend = arasan_cf_suspend,
+ .resume = arasan_cf_resume,
+};
+#endif
+
+static struct platform_driver arasan_cf_driver = {
+ .probe = arasan_cf_probe,
+ .remove = __devexit_p(arasan_cf_remove),
+ .driver = {
+ .name = DRIVER_NAME,
+ .owner = THIS_MODULE,
+#ifdef CONFIG_PM
+ .pm = &arasan_cf_pm_ops,
+#endif
+ },
+};
+
+static int __init arasan_cf_init(void)
+{
+ return platform_driver_register(&arasan_cf_driver);
+}
+module_init(arasan_cf_init);
+
+static void __exit arasan_cf_exit(void)
+{
+ platform_driver_unregister(&arasan_cf_driver);
+}
+module_exit(arasan_cf_exit);
+
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
+MODULE_DESCRIPTION("Arasan ATA Compact Flash driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/ata/pata_at32.c b/drivers/ata/pata_at32.c
index 66ce6a526f27..36f189c7ee8c 100644
--- a/drivers/ata/pata_at32.c
+++ b/drivers/ata/pata_at32.c
@@ -194,7 +194,7 @@ static int __init pata_at32_init_one(struct device *dev,
/* Setup ATA bindings */
ap->ops = &at32_port_ops;
ap->pio_mask = PIO_MASK;
- ap->flags |= ATA_FLAG_MMIO | ATA_FLAG_SLAVE_POSS;
+ ap->flags |= ATA_FLAG_SLAVE_POSS;
/*
* Since all 8-bit taskfile transfers has to go on the lower
diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c
index 7aed5c792597..e0b58b8dfe6f 100644
--- a/drivers/ata/pata_bf54x.c
+++ b/drivers/ata/pata_bf54x.c
@@ -1454,9 +1454,7 @@ static struct ata_port_operations bfin_pata_ops = {
static struct ata_port_info bfin_port_info[] = {
{
- .flags = ATA_FLAG_SLAVE_POSS
- | ATA_FLAG_MMIO
- | ATA_FLAG_NO_LEGACY,
+ .flags = ATA_FLAG_SLAVE_POSS,
.pio_mask = ATA_PIO4,
.mwdma_mask = 0,
.udma_mask = 0,
diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c
index 538ec38ba995..6c77d68dbd05 100644
--- a/drivers/ata/pata_hpt366.c
+++ b/drivers/ata/pata_hpt366.c
@@ -14,6 +14,7 @@
* Look into engine reset on timeout errors. Should not be required.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/module.h>
@@ -25,7 +26,7 @@
#include <linux/libata.h>
#define DRV_NAME "pata_hpt366"
-#define DRV_VERSION "0.6.10"
+#define DRV_VERSION "0.6.11"
struct hpt_clock {
u8 xfer_mode;
@@ -160,8 +161,8 @@ static int hpt_dma_blacklisted(const struct ata_device *dev, char *modestr,
while (list[i] != NULL) {
if (!strcmp(list[i], model_num)) {
- pr_warning(DRV_NAME ": %s is not supported for %s.\n",
- modestr, list[i]);
+ pr_warn("%s is not supported for %s\n",
+ modestr, list[i]);
return 1;
}
i++;
diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
index 4c5b5183225e..9620636aa405 100644
--- a/drivers/ata/pata_hpt37x.c
+++ b/drivers/ata/pata_hpt37x.c
@@ -14,6 +14,8 @@
* Look into engine reset on timeout errors. Should not be required.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -24,7 +26,7 @@
#include <linux/libata.h>
#define DRV_NAME "pata_hpt37x"
-#define DRV_VERSION "0.6.22"
+#define DRV_VERSION "0.6.23"
struct hpt_clock {
u8 xfer_speed;
@@ -229,8 +231,8 @@ static int hpt_dma_blacklisted(const struct ata_device *dev, char *modestr,
while (list[i] != NULL) {
if (!strcmp(list[i], model_num)) {
- pr_warning(DRV_NAME ": %s is not supported for %s.\n",
- modestr, list[i]);
+ pr_warn("%s is not supported for %s\n",
+ modestr, list[i]);
return 1;
}
i++;
@@ -863,8 +865,8 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
chip_table = &hpt372;
break;
default:
- pr_err(DRV_NAME ": Unknown HPT366 subtype, "
- "please report (%d).\n", rev);
+ pr_err("Unknown HPT366 subtype, please report (%d)\n",
+ rev);
return -ENODEV;
}
break;
@@ -904,8 +906,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
*ppi = &info_hpt374_fn1;
break;
default:
- pr_err(DRV_NAME ": PCI table is bogus, please report (%d).\n",
- dev->device);
+ pr_err("PCI table is bogus, please report (%d)\n", dev->device);
return -ENODEV;
}
/* Ok so this is a chip we support */
@@ -953,7 +954,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
u8 sr;
u32 total = 0;
- pr_warning(DRV_NAME ": BIOS has not set timing clocks.\n");
+ pr_warn("BIOS has not set timing clocks\n");
/* This is the process the HPT371 BIOS is reported to use */
for (i = 0; i < 128; i++) {
@@ -1009,7 +1010,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
(f_high << 16) | f_low | 0x100);
}
if (adjust == 8) {
- pr_err(DRV_NAME ": DPLL did not stabilize!\n");
+ pr_err("DPLL did not stabilize!\n");
return -ENODEV;
}
if (dpll == 3)
@@ -1017,7 +1018,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
else
private_data = (void *)hpt37x_timings_50;
- pr_info(DRV_NAME ": bus clock %dMHz, using %dMHz DPLL.\n",
+ pr_info("bus clock %dMHz, using %dMHz DPLL\n",
MHz[clock_slot], MHz[dpll]);
} else {
private_data = (void *)chip_table->clocks[clock_slot];
@@ -1032,7 +1033,7 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
if (clock_slot < 2 && ppi[0] == &info_hpt370a)
ppi[0] = &info_hpt370a_33;
- pr_info(DRV_NAME ": %s using %dMHz bus clock.\n",
+ pr_info("%s using %dMHz bus clock\n",
chip_table->name, MHz[clock_slot]);
}
diff --git a/drivers/ata/pata_hpt3x2n.c b/drivers/ata/pata_hpt3x2n.c
index eca68caf5f46..765f136d8cd3 100644
--- a/drivers/ata/pata_hpt3x2n.c
+++ b/drivers/ata/pata_hpt3x2n.c
@@ -15,6 +15,8 @@
* Work out best PLL policy
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>
@@ -25,7 +27,7 @@
#include <linux/libata.h>
#define DRV_NAME "pata_hpt3x2n"
-#define DRV_VERSION "0.3.14"
+#define DRV_VERSION "0.3.15"
enum {
HPT_PCI_FAST = (1 << 31),
@@ -418,7 +420,7 @@ static int hpt3x2n_pci_clock(struct pci_dev *pdev)
u16 sr;
u32 total = 0;
- pr_warning(DRV_NAME ": BIOS clock data not set.\n");
+ pr_warn("BIOS clock data not set\n");
/* This is the process the HPT371 BIOS is reported to use */
for (i = 0; i < 128; i++) {
@@ -528,8 +530,7 @@ hpt372n:
ppi[0] = &info_hpt372n;
break;
default:
- pr_err(DRV_NAME ": PCI table is bogus, please report (%d).\n",
- dev->device);
+ pr_err("PCI table is bogus, please report (%d)\n", dev->device);
return -ENODEV;
}
@@ -578,11 +579,11 @@ hpt372n:
pci_write_config_dword(dev, 0x5C, (f_high << 16) | f_low);
}
if (adjust == 8) {
- pr_err(DRV_NAME ": DPLL did not stabilize!\n");
+ pr_err("DPLL did not stabilize!\n");
return -ENODEV;
}
- pr_info(DRV_NAME ": bus clock %dMHz, using 66MHz DPLL.\n", pci_mhz);
+ pr_info("bus clock %dMHz, using 66MHz DPLL\n", pci_mhz);
/*
* Set our private data up. We only need a few flags
diff --git a/drivers/ata/pata_hpt3x3.c b/drivers/ata/pata_hpt3x3.c
index b63d5e2d4628..24d7df81546b 100644
--- a/drivers/ata/pata_hpt3x3.c
+++ b/drivers/ata/pata_hpt3x3.c
@@ -151,7 +151,7 @@ static struct ata_port_operations hpt3x3_port_ops = {
.check_atapi_dma= hpt3x3_atapi_dma,
.freeze = hpt3x3_freeze,
#endif
-
+
};
/**
diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index aa0e0c51cc08..2d15f2548a10 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c
@@ -616,7 +616,7 @@ static void it821x_display_disk(int n, u8 *buf)
if (buf[52] > 4) /* No Disk */
return;
- ata_id_c_string((u16 *)buf, id, 0, 41);
+ ata_id_c_string((u16 *)buf, id, 0, 41);
if (buf[51]) {
mode = ffs(buf[51]);
@@ -910,7 +910,7 @@ static int it821x_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
rc = pcim_enable_device(pdev);
if (rc)
return rc;
-
+
if (pdev->vendor == PCI_VENDOR_ID_RDC) {
/* Deal with Vortex86SX */
if (pdev->revision == 0x11)
diff --git a/drivers/ata/pata_ixp4xx_cf.c b/drivers/ata/pata_ixp4xx_cf.c
index ba54b089f98c..5253b271b3fe 100644
--- a/drivers/ata/pata_ixp4xx_cf.c
+++ b/drivers/ata/pata_ixp4xx_cf.c
@@ -177,7 +177,7 @@ static __devinit int ixp4xx_pata_probe(struct platform_device *pdev)
ap->ops = &ixp4xx_port_ops;
ap->pio_mask = ATA_PIO4;
- ap->flags |= ATA_FLAG_MMIO | ATA_FLAG_NO_LEGACY | ATA_FLAG_NO_ATAPI;
+ ap->flags |= ATA_FLAG_NO_ATAPI;
ixp4xx_setup_port(ap, data, cs0->start, cs1->start);
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index 75b49d01780b..46f589edccdb 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -1053,8 +1053,7 @@ static int __devinit pata_macio_common_init(struct pata_macio_priv *priv,
/* Allocate libata host for 1 port */
memset(&pinfo, 0, sizeof(struct ata_port_info));
pmac_macio_calc_timing_masks(priv, &pinfo);
- pinfo.flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_MMIO |
- ATA_FLAG_NO_LEGACY;
+ pinfo.flags = ATA_FLAG_SLAVE_POSS;
pinfo.port_ops = &pata_macio_ops;
pinfo.private_data = priv;
diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c
index dd38083dcbeb..75a6a0c0094f 100644
--- a/drivers/ata/pata_marvell.c
+++ b/drivers/ata/pata_marvell.c
@@ -38,7 +38,7 @@ static int marvell_pata_active(struct pci_dev *pdev)
/* We don't yet know how to do this for other devices */
if (pdev->device != 0x6145)
- return 1;
+ return 1;
barp = pci_iomap(pdev, 5, 0x10);
if (barp == NULL)
diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c
index cc50bd09aa26..e277a142138c 100644
--- a/drivers/ata/pata_ninja32.c
+++ b/drivers/ata/pata_ninja32.c
@@ -165,7 +165,7 @@ static int ninja32_reinit_one(struct pci_dev *pdev)
return rc;
ninja32_program(host->iomap[0]);
ata_host_resume(host);
- return 0;
+ return 0;
}
#endif
diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
index fa1b95a9a7ff..220ddc90608f 100644
--- a/drivers/ata/pata_octeon_cf.c
+++ b/drivers/ata/pata_octeon_cf.c
@@ -848,8 +848,7 @@ static int __devinit octeon_cf_probe(struct platform_device *pdev)
cf_port->ap = ap;
ap->ops = &octeon_cf_ops;
ap->pio_mask = ATA_PIO6;
- ap->flags |= ATA_FLAG_MMIO | ATA_FLAG_NO_LEGACY
- | ATA_FLAG_NO_ATAPI | ATA_FLAG_PIO_POLLING;
+ ap->flags |= ATA_FLAG_NO_ATAPI | ATA_FLAG_PIO_POLLING;
base = cs0 + ocd->base_region_bias;
if (!ocd->is16bit) {
diff --git a/drivers/ata/pata_palmld.c b/drivers/ata/pata_palmld.c
index 11fb4ccc74b4..a2a73d953840 100644
--- a/drivers/ata/pata_palmld.c
+++ b/drivers/ata/pata_palmld.c
@@ -85,7 +85,7 @@ static __devinit int palmld_pata_probe(struct platform_device *pdev)
ap = host->ports[0];
ap->ops = &palmld_port_ops;
ap->pio_mask = ATA_PIO4;
- ap->flags |= ATA_FLAG_MMIO | ATA_FLAG_NO_LEGACY | ATA_FLAG_PIO_POLLING;
+ ap->flags |= ATA_FLAG_PIO_POLLING;
/* memory mapping voodoo */
ap->ioaddr.cmd_addr = mem + 0x10;
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index 806292160b3f..29af660d968b 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -124,7 +124,7 @@ static unsigned int ata_data_xfer_8bit(struct ata_device *dev,
* reset will recover the device.
*
*/
-
+
static void pcmcia_8bit_drain_fifo(struct ata_queued_cmd *qc)
{
int count;
diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c
index b18351122525..9765ace16921 100644
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c
@@ -150,8 +150,7 @@ static struct ata_port_operations pdc2027x_pata133_ops = {
static struct ata_port_info pdc2027x_port_info[] = {
/* PDC_UDMA_100 */
{
- .flags = ATA_FLAG_NO_LEGACY | ATA_FLAG_SLAVE_POSS |
- ATA_FLAG_MMIO,
+ .flags = ATA_FLAG_SLAVE_POSS,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA5,
@@ -159,8 +158,7 @@ static struct ata_port_info pdc2027x_port_info[] = {
},
/* PDC_UDMA_133 */
{
- .flags = ATA_FLAG_NO_LEGACY | ATA_FLAG_SLAVE_POSS |
- ATA_FLAG_MMIO,
+ .flags = ATA_FLAG_SLAVE_POSS,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/pata_pxa.c b/drivers/ata/pata_pxa.c
index 1898c6ed4b4e..b4ede40f8ae1 100644
--- a/drivers/ata/pata_pxa.c
+++ b/drivers/ata/pata_pxa.c
@@ -292,7 +292,6 @@ static int __devinit pxa_ata_probe(struct platform_device *pdev)
ap->ops = &pxa_ata_port_ops;
ap->pio_mask = ATA_PIO4;
ap->mwdma_mask = ATA_MWDMA2;
- ap->flags = ATA_FLAG_MMIO;
ap->ioaddr.cmd_addr = devm_ioremap(&pdev->dev, cmd_res->start,
resource_size(cmd_res));
diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c
index 0ffd631000b7..baeaf938d55b 100644
--- a/drivers/ata/pata_rb532_cf.c
+++ b/drivers/ata/pata_rb532_cf.c
@@ -91,7 +91,6 @@ static void rb532_pata_setup_ports(struct ata_host *ah)
ap->ops = &rb532_pata_port_ops;
ap->pio_mask = ATA_PIO4;
- ap->flags = ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO;
ap->ioaddr.cmd_addr = info->iobase + RB500_CF_REG_BASE;
ap->ioaddr.ctl_addr = info->iobase + RB500_CF_REG_CTRL;
diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c
index 8a51d673e5b2..c446ae6055a3 100644
--- a/drivers/ata/pata_samsung_cf.c
+++ b/drivers/ata/pata_samsung_cf.c
@@ -531,7 +531,6 @@ static int __init pata_s3c_probe(struct platform_device *pdev)
}
ap = host->ports[0];
- ap->flags |= ATA_FLAG_MMIO;
ap->pio_mask = ATA_PIO4;
if (cpu_type == TYPE_S3C64XX) {
diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index 093715c3273a..88ea9b677b47 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c
@@ -959,7 +959,7 @@ static struct ata_port_operations scc_pata_ops = {
static struct ata_port_info scc_port_info[] = {
{
- .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_MMIO | ATA_FLAG_NO_LEGACY,
+ .flags = ATA_FLAG_SLAVE_POSS,
.pio_mask = ATA_PIO4,
/* No MWDMA */
.udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/pata_sis.c b/drivers/ata/pata_sis.c
index 60cea13cccce..c04abc393fc5 100644
--- a/drivers/ata/pata_sis.c
+++ b/drivers/ata/pata_sis.c
@@ -593,7 +593,7 @@ static const struct ata_port_info sis_info133 = {
.port_ops = &sis_133_ops,
};
const struct ata_port_info sis_info133_for_sata = {
- .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_SRST,
+ .flags = ATA_FLAG_SLAVE_POSS,
.pio_mask = ATA_PIO4,
/* No MWDMA */
.udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c
index adbe0426c8f0..1111712b3d7d 100644
--- a/drivers/ata/pdc_adma.c
+++ b/drivers/ata/pdc_adma.c
@@ -166,9 +166,7 @@ static struct ata_port_operations adma_ata_ops = {
static struct ata_port_info adma_port_info[] = {
/* board_1841_idx */
{
- .flags = ATA_FLAG_SLAVE_POSS |
- ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO |
- ATA_FLAG_PIO_POLLING,
+ .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_POLLING,
.pio_mask = ATA_PIO4_ONLY,
.udma_mask = ATA_UDMA4,
.port_ops = &adma_ata_ops,
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
index 6cf57c5c2b5f..712ab5a4922e 100644
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -40,8 +40,11 @@
#include <scsi/scsi_host.h>
#include <scsi/scsi_cmnd.h>
+/* These two are defined in "libata.h" */
+#undef DRV_NAME
+#undef DRV_VERSION
#define DRV_NAME "sata-dwc"
-#define DRV_VERSION "1.0"
+#define DRV_VERSION "1.3"
/* SATA DMA driver Globals */
#define DMA_NUM_CHANS 1
@@ -333,11 +336,47 @@ static int dma_dwc_xfer_setup(struct scatterlist *sg, int num_elems,
void __iomem *addr, int dir);
static void dma_dwc_xfer_start(int dma_ch);
+static const char *get_prot_descript(u8 protocol)
+{
+ switch ((enum ata_tf_protocols)protocol) {
+ case ATA_PROT_NODATA:
+ return "ATA no data";
+ case ATA_PROT_PIO:
+ return "ATA PIO";
+ case ATA_PROT_DMA:
+ return "ATA DMA";
+ case ATA_PROT_NCQ:
+ return "ATA NCQ";
+ case ATAPI_PROT_NODATA:
+ return "ATAPI no data";
+ case ATAPI_PROT_PIO:
+ return "ATAPI PIO";
+ case ATAPI_PROT_DMA:
+ return "ATAPI DMA";
+ default:
+ return "unknown";
+ }
+}
+
+static const char *get_dma_dir_descript(int dma_dir)
+{
+ switch ((enum dma_data_direction)dma_dir) {
+ case DMA_BIDIRECTIONAL:
+ return "bidirectional";
+ case DMA_TO_DEVICE:
+ return "to device";
+ case DMA_FROM_DEVICE:
+ return "from device";
+ default:
+ return "none";
+ }
+}
+
static void sata_dwc_tf_dump(struct ata_taskfile *tf)
{
dev_vdbg(host_pvt.dwc_dev, "taskfile cmd: 0x%02x protocol: %s flags:"
- "0x%lx device: %x\n", tf->command, ata_get_cmd_descript\
- (tf->protocol), tf->flags, tf->device);
+ "0x%lx device: %x\n", tf->command,
+ get_prot_descript(tf->protocol), tf->flags, tf->device);
dev_vdbg(host_pvt.dwc_dev, "feature: 0x%02x nsect: 0x%x lbal: 0x%x "
"lbam: 0x%x lbah: 0x%x\n", tf->feature, tf->nsect, tf->lbal,
tf->lbam, tf->lbah);
@@ -715,7 +754,7 @@ static int dma_dwc_xfer_setup(struct scatterlist *sg, int num_elems,
/* Program the CTL register with src enable / dst enable */
out_le32(&(host_pvt.sata_dma_regs->chan_regs[dma_ch].ctl.low),
DMA_CTL_LLP_SRCEN | DMA_CTL_LLP_DSTEN);
- return 0;
+ return dma_ch;
}
/*
@@ -967,7 +1006,7 @@ static irqreturn_t sata_dwc_isr(int irq, void *dev_instance)
}
dev_dbg(ap->dev, "%s non-NCQ cmd interrupt, protocol: %s\n",
- __func__, ata_get_cmd_descript(qc->tf.protocol));
+ __func__, get_prot_descript(qc->tf.protocol));
DRVSTILLBUSY:
if (ata_is_dma(qc->tf.protocol)) {
/*
@@ -1057,7 +1096,7 @@ DRVSTILLBUSY:
/* Process completed command */
dev_dbg(ap->dev, "%s NCQ command, protocol: %s\n", __func__,
- ata_get_cmd_descript(qc->tf.protocol));
+ get_prot_descript(qc->tf.protocol));
if (ata_is_dma(qc->tf.protocol)) {
host_pvt.dma_interrupt_count++;
if (hsdevp->dma_pending[tag] == \
@@ -1142,8 +1181,8 @@ static void sata_dwc_dma_xfer_complete(struct ata_port *ap, u32 check_status)
if (tag > 0) {
dev_info(ap->dev, "%s tag=%u cmd=0x%02x dma dir=%s proto=%s "
"dmacr=0x%08x\n", __func__, qc->tag, qc->tf.command,
- ata_get_cmd_descript(qc->dma_dir),
- ata_get_cmd_descript(qc->tf.protocol),
+ get_dma_dir_descript(qc->dma_dir),
+ get_prot_descript(qc->tf.protocol),
in_le32(&(hsdev->sata_dwc_regs->dmacr)));
}
#endif
@@ -1354,7 +1393,7 @@ static void sata_dwc_exec_command_by_tag(struct ata_port *ap,
struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
dev_dbg(ap->dev, "%s cmd(0x%02x): %s tag=%d\n", __func__, tf->command,
- ata_get_cmd_descript(tf), tag);
+ ata_get_cmd_descript(tf->command), tag);
spin_lock_irqsave(&ap->host->lock, flags);
hsdevp->cmd_issued[tag] = cmd_issued;
@@ -1413,7 +1452,7 @@ static void sata_dwc_bmdma_start_by_tag(struct ata_queued_cmd *qc, u8 tag)
dev_dbg(ap->dev, "%s qc=%p tag: %x cmd: 0x%02x dma_dir: %s "
"start_dma? %x\n", __func__, qc, tag, qc->tf.command,
- ata_get_cmd_descript(qc->dma_dir), start_dma);
+ get_dma_dir_descript(qc->dma_dir), start_dma);
sata_dwc_tf_dump(&(qc->tf));
if (start_dma) {
@@ -1462,10 +1501,9 @@ static void sata_dwc_qc_prep_by_tag(struct ata_queued_cmd *qc, u8 tag)
int dma_chan;
struct sata_dwc_device *hsdev = HSDEV_FROM_AP(ap);
struct sata_dwc_device_port *hsdevp = HSDEVP_FROM_AP(ap);
- int err;
dev_dbg(ap->dev, "%s: port=%d dma dir=%s n_elem=%d\n",
- __func__, ap->port_no, ata_get_cmd_descript(qc->dma_dir),
+ __func__, ap->port_no, get_dma_dir_descript(qc->dma_dir),
qc->n_elem);
dma_chan = dma_dwc_xfer_setup(sg, qc->n_elem, hsdevp->llit[tag],
@@ -1474,7 +1512,7 @@ static void sata_dwc_qc_prep_by_tag(struct ata_queued_cmd *qc, u8 tag)
dmadr), qc->dma_dir);
if (dma_chan < 0) {
dev_err(ap->dev, "%s: dma_dwc_xfer_setup returns err %d\n",
- __func__, err);
+ __func__, dma_chan);
return;
}
hsdevp->dma_chan[tag] = dma_chan;
@@ -1491,8 +1529,8 @@ static unsigned int sata_dwc_qc_issue(struct ata_queued_cmd *qc)
dev_info(ap->dev, "%s ap id=%d cmd(0x%02x)=%s qc tag=%d "
"prot=%s ap active_tag=0x%08x ap sactive=0x%08x\n",
__func__, ap->print_id, qc->tf.command,
- ata_get_cmd_descript(&qc->tf),
- qc->tag, ata_get_cmd_descript(qc->tf.protocol),
+ ata_get_cmd_descript(qc->tf.command),
+ qc->tag, get_prot_descript(qc->tf.protocol),
ap->link.active_tag, ap->link.sactive);
#endif
@@ -1533,7 +1571,7 @@ static void sata_dwc_qc_prep(struct ata_queued_cmd *qc)
#ifdef DEBUG_NCQ
if (qc->tag > 0)
dev_info(qc->ap->dev, "%s: qc->tag=%d ap->active_tag=0x%08x\n",
- __func__, tag, qc->ap->link.active_tag);
+ __func__, qc->tag, qc->ap->link.active_tag);
return ;
#endif
@@ -1580,9 +1618,8 @@ static struct ata_port_operations sata_dwc_ops = {
static const struct ata_port_info sata_dwc_port_info[] = {
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO | ATA_FLAG_NCQ,
- .pio_mask = 0x1f, /* pio 0-4 */
+ .flags = ATA_FLAG_SATA | ATA_FLAG_NCQ,
+ .pio_mask = ATA_PIO4,
.udma_mask = ATA_UDMA6,
.port_ops = &sata_dwc_ops,
},
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index b0214d00d50b..7f9eab34a386 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -33,8 +33,7 @@ enum {
SATA_FSL_MAX_PRD_USABLE = SATA_FSL_MAX_PRD - 1,
SATA_FSL_MAX_PRD_DIRECT = 16, /* Direct PRDT entries */
- SATA_FSL_HOST_FLAGS = (ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA |
+ SATA_FSL_HOST_FLAGS = (ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
ATA_FLAG_PMP | ATA_FLAG_NCQ | ATA_FLAG_AN),
SATA_FSL_MAX_CMDS = SATA_FSL_QUEUE_DEPTH,
@@ -186,6 +185,11 @@ enum {
COMMANDSTAT = 0x20,
};
+/* TRANSCFG (transport-layer) configuration control */
+enum {
+ TRANSCFG_RX_WATER_MARK = (1 << 4),
+};
+
/* PHY (link-layer) configuration control */
enum {
PHY_BIST_ENABLE = 0x01,
@@ -1040,12 +1044,15 @@ static void sata_fsl_error_intr(struct ata_port *ap)
/* find out the offending link and qc */
if (ap->nr_pmp_links) {
+ unsigned int dev_num;
+
dereg = ioread32(hcr_base + DE);
iowrite32(dereg, hcr_base + DE);
iowrite32(cereg, hcr_base + CE);
- if (dereg < ap->nr_pmp_links) {
- link = &ap->pmp_link[dereg];
+ dev_num = ffs(dereg) - 1;
+ if (dev_num < ap->nr_pmp_links && dereg != 0) {
+ link = &ap->pmp_link[dev_num];
ehi = &link->eh_info;
qc = ata_qc_from_tag(ap, link->active_tag);
/*
@@ -1303,6 +1310,7 @@ static int sata_fsl_probe(struct platform_device *ofdev,
struct sata_fsl_host_priv *host_priv = NULL;
int irq;
struct ata_host *host;
+ u32 temp;
struct ata_port_info pi = sata_fsl_port_info[0];
const struct ata_port_info *ppi[] = { &pi, NULL };
@@ -1317,6 +1325,12 @@ static int sata_fsl_probe(struct platform_device *ofdev,
ssr_base = hcr_base + 0x100;
csr_base = hcr_base + 0x140;
+ if (!of_device_is_compatible(ofdev->dev.of_node, "fsl,mpc8315-sata")) {
+ temp = ioread32(csr_base + TRANSCFG);
+ temp = temp & 0xffffffe0;
+ iowrite32(temp | TRANSCFG_RX_WATER_MARK, csr_base + TRANSCFG);
+ }
+
DPRINTK("@reset i/o = 0x%x\n", ioread32(csr_base + TRANSCFG));
DPRINTK("sizeof(cmd_desc) = %d\n", sizeof(struct command_desc));
DPRINTK("sizeof(#define cmd_desc) = %d\n", SATA_FSL_CMD_DESC_SIZE);
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index bf74a36d3cc3..cd40651e9b72 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -160,8 +160,7 @@ enum {
/* Host Flags */
MV_FLAG_DUAL_HC = (1 << 30), /* two SATA Host Controllers */
- MV_COMMON_FLAGS = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO | ATA_FLAG_PIO_POLLING,
+ MV_COMMON_FLAGS = ATA_FLAG_SATA | ATA_FLAG_PIO_POLLING,
MV_GEN_I_FLAGS = MV_COMMON_FLAGS | ATA_FLAG_NO_ATAPI,
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 7254e255fd78..42344e3c686d 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -539,7 +539,7 @@ struct nv_pi_priv {
static const struct ata_port_info nv_port_info[] = {
/* generic */
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY,
+ .flags = ATA_FLAG_SATA,
.pio_mask = NV_PIO_MASK,
.mwdma_mask = NV_MWDMA_MASK,
.udma_mask = NV_UDMA_MASK,
@@ -548,7 +548,7 @@ static const struct ata_port_info nv_port_info[] = {
},
/* nforce2/3 */
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY,
+ .flags = ATA_FLAG_SATA,
.pio_mask = NV_PIO_MASK,
.mwdma_mask = NV_MWDMA_MASK,
.udma_mask = NV_UDMA_MASK,
@@ -557,7 +557,7 @@ static const struct ata_port_info nv_port_info[] = {
},
/* ck804 */
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY,
+ .flags = ATA_FLAG_SATA,
.pio_mask = NV_PIO_MASK,
.mwdma_mask = NV_MWDMA_MASK,
.udma_mask = NV_UDMA_MASK,
@@ -566,8 +566,7 @@ static const struct ata_port_info nv_port_info[] = {
},
/* ADMA */
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO | ATA_FLAG_NCQ,
+ .flags = ATA_FLAG_SATA | ATA_FLAG_NCQ,
.pio_mask = NV_PIO_MASK,
.mwdma_mask = NV_MWDMA_MASK,
.udma_mask = NV_UDMA_MASK,
@@ -576,7 +575,7 @@ static const struct ata_port_info nv_port_info[] = {
},
/* MCP5x */
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY,
+ .flags = ATA_FLAG_SATA,
.pio_mask = NV_PIO_MASK,
.mwdma_mask = NV_MWDMA_MASK,
.udma_mask = NV_UDMA_MASK,
@@ -585,8 +584,7 @@ static const struct ata_port_info nv_port_info[] = {
},
/* SWNCQ */
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_NCQ,
+ .flags = ATA_FLAG_SATA | ATA_FLAG_NCQ,
.pio_mask = NV_PIO_MASK,
.mwdma_mask = NV_MWDMA_MASK,
.udma_mask = NV_UDMA_MASK,
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index f03ad48273ff..a004b1e0ea6d 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -134,9 +134,7 @@ enum {
PDC_IRQ_DISABLE = (1 << 10),
PDC_RESET = (1 << 11), /* HDMA reset */
- PDC_COMMON_FLAGS = ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO |
- ATA_FLAG_PIO_POLLING,
+ PDC_COMMON_FLAGS = ATA_FLAG_PIO_POLLING,
/* ap->flags bits */
PDC_FLAG_GEN_II = (1 << 24),
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index daeebf19a6a9..c5603265fa58 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -155,8 +155,7 @@ static struct ata_port_operations qs_ata_ops = {
static const struct ata_port_info qs_port_info[] = {
/* board_2068_idx */
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO | ATA_FLAG_PIO_POLLING,
+ .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_POLLING,
.pio_mask = ATA_PIO4_ONLY,
.udma_mask = ATA_UDMA6,
.port_ops = &qs_ata_ops,
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 3a4f84219719..b42edaaf3a53 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -61,8 +61,7 @@ enum {
SIL_FLAG_RERR_ON_DMA_ACT = (1 << 29),
SIL_FLAG_MOD15WRITE = (1 << 30),
- SIL_DFL_PORT_FLAGS = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO,
+ SIL_DFL_PORT_FLAGS = ATA_FLAG_SATA,
/*
* Controller IDs
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index af41c6fd1254..06c564e55051 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -244,8 +244,7 @@ enum {
BID_SIL3131 = 2,
/* host flags */
- SIL24_COMMON_FLAGS = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA |
+ SIL24_COMMON_FLAGS = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
ATA_FLAG_NCQ | ATA_FLAG_ACPI_SATA |
ATA_FLAG_AN | ATA_FLAG_PMP,
SIL24_FLAG_PCIX_IRQ_WOC = (1 << 24), /* IRQ loss errata on PCI-X */
diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c
index 2bfe3ae03976..cdcc13e9cf51 100644
--- a/drivers/ata/sata_sis.c
+++ b/drivers/ata/sata_sis.c
@@ -96,7 +96,7 @@ static struct ata_port_operations sis_ops = {
};
static const struct ata_port_info sis_port_info = {
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY,
+ .flags = ATA_FLAG_SATA,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c
index 7d9db4aaf07e..35eabcf34568 100644
--- a/drivers/ata/sata_svw.c
+++ b/drivers/ata/sata_svw.c
@@ -359,8 +359,7 @@ static struct ata_port_operations k2_sata_ops = {
static const struct ata_port_info k2_port_info[] = {
/* chip_svw4 */
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO | K2_FLAG_NO_ATAPI_DMA,
+ .flags = ATA_FLAG_SATA | K2_FLAG_NO_ATAPI_DMA,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
@@ -368,8 +367,7 @@ static const struct ata_port_info k2_port_info[] = {
},
/* chip_svw8 */
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO | K2_FLAG_NO_ATAPI_DMA |
+ .flags = ATA_FLAG_SATA | K2_FLAG_NO_ATAPI_DMA |
K2_FLAG_SATA_8_PORTS,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
@@ -378,8 +376,7 @@ static const struct ata_port_info k2_port_info[] = {
},
/* chip_svw42 */
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO | K2_FLAG_BAR_POS_3,
+ .flags = ATA_FLAG_SATA | K2_FLAG_BAR_POS_3,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
@@ -387,8 +384,7 @@ static const struct ata_port_info k2_port_info[] = {
},
/* chip_svw43 */
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO,
+ .flags = ATA_FLAG_SATA,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index bedd5188e5b0..8fd3b7252bda 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -273,9 +273,8 @@ static struct ata_port_operations pdc_20621_ops = {
static const struct ata_port_info pdc_port_info[] = {
/* board_20621 */
{
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_SRST | ATA_FLAG_MMIO |
- ATA_FLAG_NO_ATAPI | ATA_FLAG_PIO_POLLING,
+ .flags = ATA_FLAG_SATA | ATA_FLAG_NO_ATAPI |
+ ATA_FLAG_PIO_POLLING,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c
index b8578c32d344..235be717a713 100644
--- a/drivers/ata/sata_uli.c
+++ b/drivers/ata/sata_uli.c
@@ -88,8 +88,7 @@ static struct ata_port_operations uli_ops = {
};
static const struct ata_port_info uli_port_info = {
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_IGN_SIMPLEX,
+ .flags = ATA_FLAG_SATA | ATA_FLAG_IGN_SIMPLEX,
.pio_mask = ATA_PIO4,
.udma_mask = ATA_UDMA6,
.port_ops = &uli_ops,
diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index 8b677bbf2d37..21242c5709a0 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -148,7 +148,7 @@ static struct ata_port_operations vt8251_ops = {
};
static const struct ata_port_info vt6420_port_info = {
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY,
+ .flags = ATA_FLAG_SATA,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
@@ -156,7 +156,7 @@ static const struct ata_port_info vt6420_port_info = {
};
static struct ata_port_info vt6421_sport_info = {
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY,
+ .flags = ATA_FLAG_SATA,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
@@ -164,7 +164,7 @@ static struct ata_port_info vt6421_sport_info = {
};
static struct ata_port_info vt6421_pport_info = {
- .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_NO_LEGACY,
+ .flags = ATA_FLAG_SLAVE_POSS,
.pio_mask = ATA_PIO4,
/* No MWDMA */
.udma_mask = ATA_UDMA6,
@@ -172,8 +172,7 @@ static struct ata_port_info vt6421_pport_info = {
};
static struct ata_port_info vt8251_port_info = {
- .flags = ATA_FLAG_SATA | ATA_FLAG_SLAVE_POSS |
- ATA_FLAG_NO_LEGACY,
+ .flags = ATA_FLAG_SATA | ATA_FLAG_SLAVE_POSS,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c
index e079cf29ed5d..7c987371136e 100644
--- a/drivers/ata/sata_vsc.c
+++ b/drivers/ata/sata_vsc.c
@@ -340,8 +340,7 @@ static int __devinit vsc_sata_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
static const struct ata_port_info pi = {
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
- ATA_FLAG_MMIO,
+ .flags = ATA_FLAG_SATA,
.pio_mask = ATA_PIO4,
.mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 73fb1c4f4cd4..25ef1a4556e6 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -866,8 +866,9 @@ static int popen(struct atm_vcc *vcc)
}
skb = alloc_skb(sizeof(*header), GFP_ATOMIC);
- if (!skb && net_ratelimit()) {
- dev_warn(&card->dev->dev, "Failed to allocate sk_buff in popen()\n");
+ if (!skb) {
+ if (net_ratelimit())
+ dev_warn(&card->dev->dev, "Failed to allocate sk_buff in popen()\n");
return -ENOMEM;
}
header = (void *)skb_put(skb, sizeof(*header));
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 5f51c3b4451e..4c5701c15f53 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -1,6 +1,6 @@
# Makefile for the Linux device tree
-obj-y := core.o sys.o bus.o dd.o \
+obj-y := core.o sys.o bus.o dd.o syscore.o \
driver.o class.o platform.o \
cpu.o firmware.o init.o map.o devres.o \
attribute_container.o transport_class.o
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index abe46edfe5b4..118c1b92a511 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,7 +1,6 @@
-obj-$(CONFIG_PM) += sysfs.o
+obj-$(CONFIG_PM) += sysfs.o generic_ops.o
obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o
obj-$(CONFIG_PM_RUNTIME) += runtime.o
-obj-$(CONFIG_PM_OPS) += generic_ops.o
obj-$(CONFIG_PM_TRACE_RTC) += trace.o
obj-$(CONFIG_PM_OPP) += opp.o
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 83404973f97a..052dc53eef38 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -423,26 +423,22 @@ static int device_resume_noirq(struct device *dev, pm_message_t state)
TRACE_DEVICE(dev);
TRACE_RESUME(0);
- if (dev->bus && dev->bus->pm) {
- pm_dev_dbg(dev, state, "EARLY ");
- error = pm_noirq_op(dev, dev->bus->pm, state);
- if (error)
- goto End;
+ if (dev->pwr_domain) {
+ pm_dev_dbg(dev, state, "EARLY power domain ");
+ pm_noirq_op(dev, &dev->pwr_domain->ops, state);
}
if (dev->type && dev->type->pm) {
pm_dev_dbg(dev, state, "EARLY type ");
error = pm_noirq_op(dev, dev->type->pm, state);
- if (error)
- goto End;
- }
-
- if (dev->class && dev->class->pm) {
+ } else if (dev->class && dev->class->pm) {
pm_dev_dbg(dev, state, "EARLY class ");
error = pm_noirq_op(dev, dev->class->pm, state);
+ } else if (dev->bus && dev->bus->pm) {
+ pm_dev_dbg(dev, state, "EARLY ");
+ error = pm_noirq_op(dev, dev->bus->pm, state);
}
-End:
TRACE_RESUME(error);
return error;
}
@@ -518,36 +514,39 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
dev->power.in_suspend = false;
- if (dev->bus) {
- if (dev->bus->pm) {
- pm_dev_dbg(dev, state, "");
- error = pm_op(dev, dev->bus->pm, state);
- } else if (dev->bus->resume) {
- pm_dev_dbg(dev, state, "legacy ");
- error = legacy_resume(dev, dev->bus->resume);
- }
- if (error)
- goto End;
+ if (dev->pwr_domain) {
+ pm_dev_dbg(dev, state, "power domain ");
+ pm_op(dev, &dev->pwr_domain->ops, state);
}
- if (dev->type) {
- if (dev->type->pm) {
- pm_dev_dbg(dev, state, "type ");
- error = pm_op(dev, dev->type->pm, state);
- }
- if (error)
- goto End;
+ if (dev->type && dev->type->pm) {
+ pm_dev_dbg(dev, state, "type ");
+ error = pm_op(dev, dev->type->pm, state);
+ goto End;
}
if (dev->class) {
if (dev->class->pm) {
pm_dev_dbg(dev, state, "class ");
error = pm_op(dev, dev->class->pm, state);
+ goto End;
} else if (dev->class->resume) {
pm_dev_dbg(dev, state, "legacy class ");
error = legacy_resume(dev, dev->class->resume);
+ goto End;
}
}
+
+ if (dev->bus) {
+ if (dev->bus->pm) {
+ pm_dev_dbg(dev, state, "");
+ error = pm_op(dev, dev->bus->pm, state);
+ } else if (dev->bus->resume) {
+ pm_dev_dbg(dev, state, "legacy ");
+ error = legacy_resume(dev, dev->bus->resume);
+ }
+ }
+
End:
device_unlock(dev);
complete_all(&dev->power.completion);
@@ -629,19 +628,23 @@ static void device_complete(struct device *dev, pm_message_t state)
{
device_lock(dev);
- if (dev->class && dev->class->pm && dev->class->pm->complete) {
- pm_dev_dbg(dev, state, "completing class ");
- dev->class->pm->complete(dev);
+ if (dev->pwr_domain && dev->pwr_domain->ops.complete) {
+ pm_dev_dbg(dev, state, "completing power domain ");
+ dev->pwr_domain->ops.complete(dev);
}
- if (dev->type && dev->type->pm && dev->type->pm->complete) {
+ if (dev->type && dev->type->pm) {
pm_dev_dbg(dev, state, "completing type ");
- dev->type->pm->complete(dev);
- }
-
- if (dev->bus && dev->bus->pm && dev->bus->pm->complete) {
+ if (dev->type->pm->complete)
+ dev->type->pm->complete(dev);
+ } else if (dev->class && dev->class->pm) {
+ pm_dev_dbg(dev, state, "completing class ");
+ if (dev->class->pm->complete)
+ dev->class->pm->complete(dev);
+ } else if (dev->bus && dev->bus->pm) {
pm_dev_dbg(dev, state, "completing ");
- dev->bus->pm->complete(dev);
+ if (dev->bus->pm->complete)
+ dev->bus->pm->complete(dev);
}
device_unlock(dev);
@@ -669,7 +672,6 @@ static void dpm_complete(pm_message_t state)
mutex_unlock(&dpm_list_mtx);
device_complete(dev, state);
- pm_runtime_put_sync(dev);
mutex_lock(&dpm_list_mtx);
put_device(dev);
@@ -727,29 +729,31 @@ static pm_message_t resume_event(pm_message_t sleep_state)
*/
static int device_suspend_noirq(struct device *dev, pm_message_t state)
{
- int error = 0;
-
- if (dev->class && dev->class->pm) {
- pm_dev_dbg(dev, state, "LATE class ");
- error = pm_noirq_op(dev, dev->class->pm, state);
- if (error)
- goto End;
- }
+ int error;
if (dev->type && dev->type->pm) {
pm_dev_dbg(dev, state, "LATE type ");
error = pm_noirq_op(dev, dev->type->pm, state);
if (error)
- goto End;
- }
-
- if (dev->bus && dev->bus->pm) {
+ return error;
+ } else if (dev->class && dev->class->pm) {
+ pm_dev_dbg(dev, state, "LATE class ");
+ error = pm_noirq_op(dev, dev->class->pm, state);
+ if (error)
+ return error;
+ } else if (dev->bus && dev->bus->pm) {
pm_dev_dbg(dev, state, "LATE ");
error = pm_noirq_op(dev, dev->bus->pm, state);
+ if (error)
+ return error;
}
-End:
- return error;
+ if (dev->pwr_domain) {
+ pm_dev_dbg(dev, state, "LATE power domain ");
+ pm_noirq_op(dev, &dev->pwr_domain->ops, state);
+ }
+
+ return 0;
}
/**
@@ -836,25 +840,22 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
goto End;
}
+ if (dev->type && dev->type->pm) {
+ pm_dev_dbg(dev, state, "type ");
+ error = pm_op(dev, dev->type->pm, state);
+ goto Domain;
+ }
+
if (dev->class) {
if (dev->class->pm) {
pm_dev_dbg(dev, state, "class ");
error = pm_op(dev, dev->class->pm, state);
+ goto Domain;
} else if (dev->class->suspend) {
pm_dev_dbg(dev, state, "legacy class ");
error = legacy_suspend(dev, state, dev->class->suspend);
+ goto Domain;
}
- if (error)
- goto End;
- }
-
- if (dev->type) {
- if (dev->type->pm) {
- pm_dev_dbg(dev, state, "type ");
- error = pm_op(dev, dev->type->pm, state);
- }
- if (error)
- goto End;
}
if (dev->bus) {
@@ -867,6 +868,12 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
}
}
+ Domain:
+ if (!error && dev->pwr_domain) {
+ pm_dev_dbg(dev, state, "power domain ");
+ pm_op(dev, &dev->pwr_domain->ops, state);
+ }
+
End:
device_unlock(dev);
complete_all(&dev->power.completion);
@@ -957,27 +964,34 @@ static int device_prepare(struct device *dev, pm_message_t state)
device_lock(dev);
- if (dev->bus && dev->bus->pm && dev->bus->pm->prepare) {
+ if (dev->type && dev->type->pm) {
+ pm_dev_dbg(dev, state, "preparing type ");
+ if (dev->type->pm->prepare)
+ error = dev->type->pm->prepare(dev);
+ suspend_report_result(dev->type->pm->prepare, error);
+ if (error)
+ goto End;
+ } else if (dev->class && dev->class->pm) {
+ pm_dev_dbg(dev, state, "preparing class ");
+ if (dev->class->pm->prepare)
+ error = dev->class->pm->prepare(dev);
+ suspend_report_result(dev->class->pm->prepare, error);
+ if (error)
+ goto End;
+ } else if (dev->bus && dev->bus->pm) {
pm_dev_dbg(dev, state, "preparing ");
- error = dev->bus->pm->prepare(dev);
+ if (dev->bus->pm->prepare)
+ error = dev->bus->pm->prepare(dev);
suspend_report_result(dev->bus->pm->prepare, error);
if (error)
goto End;
}
- if (dev->type && dev->type->pm && dev->type->pm->prepare) {
- pm_dev_dbg(dev, state, "preparing type ");
- error = dev->type->pm->prepare(dev);
- suspend_report_result(dev->type->pm->prepare, error);
- if (error)
- goto End;
+ if (dev->pwr_domain && dev->pwr_domain->ops.prepare) {
+ pm_dev_dbg(dev, state, "preparing power domain ");
+ dev->pwr_domain->ops.prepare(dev);
}
- if (dev->class && dev->class->pm && dev->class->pm->prepare) {
- pm_dev_dbg(dev, state, "preparing class ");
- error = dev->class->pm->prepare(dev);
- suspend_report_result(dev->class->pm->prepare, error);
- }
End:
device_unlock(dev);
@@ -1005,12 +1019,9 @@ static int dpm_prepare(pm_message_t state)
if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
pm_wakeup_event(dev, 0);
- if (pm_wakeup_pending()) {
- pm_runtime_put_sync(dev);
- error = -EBUSY;
- } else {
- error = device_prepare(dev, state);
- }
+ pm_runtime_put_sync(dev);
+ error = pm_wakeup_pending() ?
+ -EBUSY : device_prepare(dev, state);
mutex_lock(&dpm_list_mtx);
if (error) {
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 2bb9b4cf59d7..56a6899f5e9e 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -222,7 +222,7 @@ int opp_get_opp_count(struct device *dev)
* opp_find_freq_exact() - search for an exact frequency
* @dev: device for which we do this operation
* @freq: frequency to search for
- * @is_available: true/false - match for available opp
+ * @available: true/false - match for available opp
*
* Searches for exact match in the opp list and returns pointer to the matching
* opp if found, else returns ERR_PTR in case of error and should be handled
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 698dde742587..f2a25f18fde7 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -58,19 +58,18 @@ static inline void device_pm_move_last(struct device *dev) {}
* sysfs.c
*/
-extern int dpm_sysfs_add(struct device *);
-extern void dpm_sysfs_remove(struct device *);
-extern void rpm_sysfs_remove(struct device *);
+extern int dpm_sysfs_add(struct device *dev);
+extern void dpm_sysfs_remove(struct device *dev);
+extern void rpm_sysfs_remove(struct device *dev);
+extern int wakeup_sysfs_add(struct device *dev);
+extern void wakeup_sysfs_remove(struct device *dev);
#else /* CONFIG_PM */
-static inline int dpm_sysfs_add(struct device *dev)
-{
- return 0;
-}
-
-static inline void dpm_sysfs_remove(struct device *dev)
-{
-}
+static inline int dpm_sysfs_add(struct device *dev) { return 0; }
+static inline void dpm_sysfs_remove(struct device *dev) {}
+static inline void rpm_sysfs_remove(struct device *dev) {}
+static inline int wakeup_sysfs_add(struct device *dev) { return 0; }
+static inline void wakeup_sysfs_remove(struct device *dev) {}
#endif
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 42615b419dfb..54597c859ecb 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -168,6 +168,7 @@ static int rpm_check_suspend_allowed(struct device *dev)
static int rpm_idle(struct device *dev, int rpmflags)
{
int (*callback)(struct device *);
+ int (*domain_callback)(struct device *);
int retval;
retval = rpm_check_suspend_allowed(dev);
@@ -213,19 +214,28 @@ static int rpm_idle(struct device *dev, int rpmflags)
dev->power.idle_notification = true;
- if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle)
- callback = dev->bus->pm->runtime_idle;
- else if (dev->type && dev->type->pm && dev->type->pm->runtime_idle)
+ if (dev->type && dev->type->pm)
callback = dev->type->pm->runtime_idle;
else if (dev->class && dev->class->pm)
callback = dev->class->pm->runtime_idle;
+ else if (dev->bus && dev->bus->pm)
+ callback = dev->bus->pm->runtime_idle;
else
callback = NULL;
- if (callback) {
+ if (dev->pwr_domain)
+ domain_callback = dev->pwr_domain->ops.runtime_idle;
+ else
+ domain_callback = NULL;
+
+ if (callback || domain_callback) {
spin_unlock_irq(&dev->power.lock);
- callback(dev);
+ if (domain_callback)
+ retval = domain_callback(dev);
+
+ if (!retval && callback)
+ callback(dev);
spin_lock_irq(&dev->power.lock);
}
@@ -372,12 +382,12 @@ static int rpm_suspend(struct device *dev, int rpmflags)
__update_runtime_status(dev, RPM_SUSPENDING);
- if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend)
- callback = dev->bus->pm->runtime_suspend;
- else if (dev->type && dev->type->pm && dev->type->pm->runtime_suspend)
+ if (dev->type && dev->type->pm)
callback = dev->type->pm->runtime_suspend;
else if (dev->class && dev->class->pm)
callback = dev->class->pm->runtime_suspend;
+ else if (dev->bus && dev->bus->pm)
+ callback = dev->bus->pm->runtime_suspend;
else
callback = NULL;
@@ -390,6 +400,8 @@ static int rpm_suspend(struct device *dev, int rpmflags)
else
pm_runtime_cancel_pending(dev);
} else {
+ if (dev->pwr_domain)
+ rpm_callback(dev->pwr_domain->ops.runtime_suspend, dev);
no_callback:
__update_runtime_status(dev, RPM_SUSPENDED);
pm_runtime_deactivate_timer(dev);
@@ -569,12 +581,15 @@ static int rpm_resume(struct device *dev, int rpmflags)
__update_runtime_status(dev, RPM_RESUMING);
- if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume)
- callback = dev->bus->pm->runtime_resume;
- else if (dev->type && dev->type->pm && dev->type->pm->runtime_resume)
+ if (dev->pwr_domain)
+ rpm_callback(dev->pwr_domain->ops.runtime_resume, dev);
+
+ if (dev->type && dev->type->pm)
callback = dev->type->pm->runtime_resume;
else if (dev->class && dev->class->pm)
callback = dev->class->pm->runtime_resume;
+ else if (dev->bus && dev->bus->pm)
+ callback = dev->bus->pm->runtime_resume;
else
callback = NULL;
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 0b1e46bf3e56..fff49bee781d 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -431,26 +431,18 @@ static ssize_t async_store(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR(async, 0644, async_show, async_store);
#endif /* CONFIG_PM_ADVANCED_DEBUG */
-static struct attribute * power_attrs[] = {
- &dev_attr_wakeup.attr,
-#ifdef CONFIG_PM_SLEEP
- &dev_attr_wakeup_count.attr,
- &dev_attr_wakeup_active_count.attr,
- &dev_attr_wakeup_hit_count.attr,
- &dev_attr_wakeup_active.attr,
- &dev_attr_wakeup_total_time_ms.attr,
- &dev_attr_wakeup_max_time_ms.attr,
- &dev_attr_wakeup_last_time_ms.attr,
-#endif
+static struct attribute *power_attrs[] = {
#ifdef CONFIG_PM_ADVANCED_DEBUG
+#ifdef CONFIG_PM_SLEEP
&dev_attr_async.attr,
+#endif
#ifdef CONFIG_PM_RUNTIME
&dev_attr_runtime_status.attr,
&dev_attr_runtime_usage.attr,
&dev_attr_runtime_active_kids.attr,
&dev_attr_runtime_enabled.attr,
#endif
-#endif
+#endif /* CONFIG_PM_ADVANCED_DEBUG */
NULL,
};
static struct attribute_group pm_attr_group = {
@@ -458,9 +450,26 @@ static struct attribute_group pm_attr_group = {
.attrs = power_attrs,
};
-#ifdef CONFIG_PM_RUNTIME
+static struct attribute *wakeup_attrs[] = {
+#ifdef CONFIG_PM_SLEEP
+ &dev_attr_wakeup.attr,
+ &dev_attr_wakeup_count.attr,
+ &dev_attr_wakeup_active_count.attr,
+ &dev_attr_wakeup_hit_count.attr,
+ &dev_attr_wakeup_active.attr,
+ &dev_attr_wakeup_total_time_ms.attr,
+ &dev_attr_wakeup_max_time_ms.attr,
+ &dev_attr_wakeup_last_time_ms.attr,
+#endif
+ NULL,
+};
+static struct attribute_group pm_wakeup_attr_group = {
+ .name = power_group_name,
+ .attrs = wakeup_attrs,
+};
static struct attribute *runtime_attrs[] = {
+#ifdef CONFIG_PM_RUNTIME
#ifndef CONFIG_PM_ADVANCED_DEBUG
&dev_attr_runtime_status.attr,
#endif
@@ -468,6 +477,7 @@ static struct attribute *runtime_attrs[] = {
&dev_attr_runtime_suspended_time.attr,
&dev_attr_runtime_active_time.attr,
&dev_attr_autosuspend_delay_ms.attr,
+#endif /* CONFIG_PM_RUNTIME */
NULL,
};
static struct attribute_group pm_runtime_attr_group = {
@@ -480,35 +490,49 @@ int dpm_sysfs_add(struct device *dev)
int rc;
rc = sysfs_create_group(&dev->kobj, &pm_attr_group);
- if (rc == 0 && !dev->power.no_callbacks) {
+ if (rc)
+ return rc;
+
+ if (pm_runtime_callbacks_present(dev)) {
rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group);
if (rc)
- sysfs_remove_group(&dev->kobj, &pm_attr_group);
+ goto err_out;
+ }
+
+ if (device_can_wakeup(dev)) {
+ rc = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group);
+ if (rc) {
+ if (pm_runtime_callbacks_present(dev))
+ sysfs_unmerge_group(&dev->kobj,
+ &pm_runtime_attr_group);
+ goto err_out;
+ }
}
+ return 0;
+
+ err_out:
+ sysfs_remove_group(&dev->kobj, &pm_attr_group);
return rc;
}
-void rpm_sysfs_remove(struct device *dev)
+int wakeup_sysfs_add(struct device *dev)
{
- sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
+ return sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group);
}
-void dpm_sysfs_remove(struct device *dev)
+void wakeup_sysfs_remove(struct device *dev)
{
- rpm_sysfs_remove(dev);
- sysfs_remove_group(&dev->kobj, &pm_attr_group);
+ sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
}
-#else /* CONFIG_PM_RUNTIME */
-
-int dpm_sysfs_add(struct device * dev)
+void rpm_sysfs_remove(struct device *dev)
{
- return sysfs_create_group(&dev->kobj, &pm_attr_group);
+ sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group);
}
-void dpm_sysfs_remove(struct device * dev)
+void dpm_sysfs_remove(struct device *dev)
{
+ rpm_sysfs_remove(dev);
+ sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group);
sysfs_remove_group(&dev->kobj, &pm_attr_group);
}
-
-#endif
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index 9f4258df4cfd..c80e138b62fe 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -112,7 +112,7 @@ static unsigned int read_magic_time(void)
unsigned int val;
get_rtc_time(&time);
- printk("Time: %2d:%02d:%02d Date: %02d/%02d/%02d\n",
+ pr_info("Time: %2d:%02d:%02d Date: %02d/%02d/%02d\n",
time.tm_hour, time.tm_min, time.tm_sec,
time.tm_mon + 1, time.tm_mday, time.tm_year % 100);
val = time.tm_year; /* 100 years */
@@ -179,7 +179,7 @@ static int show_file_hash(unsigned int value)
unsigned int hash = hash_string(lineno, file, FILEHASH);
if (hash != value)
continue;
- printk(" hash matches %s:%u\n", file, lineno);
+ pr_info(" hash matches %s:%u\n", file, lineno);
match++;
}
return match;
@@ -255,7 +255,7 @@ static int late_resume_init(void)
val = val / FILEHASH;
dev = val /* % DEVHASH */;
- printk(" Magic number: %d:%d:%d\n", user, file, dev);
+ pr_info(" Magic number: %d:%d:%d\n", user, file, dev);
show_file_hash(file);
show_dev_hash(dev);
return 0;
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 8ec406d8f548..4573c83df6dd 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -24,12 +24,26 @@
*/
bool events_check_enabled;
-/* The counter of registered wakeup events. */
-static atomic_t event_count = ATOMIC_INIT(0);
-/* A preserved old value of event_count. */
+/*
+ * Combined counters of registered wakeup events and wakeup events in progress.
+ * They need to be modified together atomically, so it's better to use one
+ * atomic variable to hold them both.
+ */
+static atomic_t combined_event_count = ATOMIC_INIT(0);
+
+#define IN_PROGRESS_BITS (sizeof(int) * 4)
+#define MAX_IN_PROGRESS ((1 << IN_PROGRESS_BITS) - 1)
+
+static void split_counters(unsigned int *cnt, unsigned int *inpr)
+{
+ unsigned int comb = atomic_read(&combined_event_count);
+
+ *cnt = (comb >> IN_PROGRESS_BITS);
+ *inpr = comb & MAX_IN_PROGRESS;
+}
+
+/* A preserved old value of the events counter. */
static unsigned int saved_count;
-/* The counter of wakeup events being processed. */
-static atomic_t events_in_progress = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(events_lock);
@@ -228,6 +242,35 @@ int device_wakeup_disable(struct device *dev)
EXPORT_SYMBOL_GPL(device_wakeup_disable);
/**
+ * device_set_wakeup_capable - Set/reset device wakeup capability flag.
+ * @dev: Device to handle.
+ * @capable: Whether or not @dev is capable of waking up the system from sleep.
+ *
+ * If @capable is set, set the @dev's power.can_wakeup flag and add its
+ * wakeup-related attributes to sysfs. Otherwise, unset the @dev's
+ * power.can_wakeup flag and remove its wakeup-related attributes from sysfs.
+ *
+ * This function may sleep and it can't be called from any context where
+ * sleeping is not allowed.
+ */
+void device_set_wakeup_capable(struct device *dev, bool capable)
+{
+ if (!!dev->power.can_wakeup == !!capable)
+ return;
+
+ if (device_is_registered(dev)) {
+ if (capable) {
+ if (wakeup_sysfs_add(dev))
+ return;
+ } else {
+ wakeup_sysfs_remove(dev);
+ }
+ }
+ dev->power.can_wakeup = capable;
+}
+EXPORT_SYMBOL_GPL(device_set_wakeup_capable);
+
+/**
* device_init_wakeup - Device wakeup initialization.
* @dev: Device to handle.
* @enable: Whether or not to enable @dev as a wakeup device.
@@ -307,7 +350,8 @@ static void wakeup_source_activate(struct wakeup_source *ws)
ws->timer_expires = jiffies;
ws->last_time = ktime_get();
- atomic_inc(&events_in_progress);
+ /* Increment the counter of events in progress. */
+ atomic_inc(&combined_event_count);
}
/**
@@ -394,14 +438,10 @@ static void wakeup_source_deactivate(struct wakeup_source *ws)
del_timer(&ws->timer);
/*
- * event_count has to be incremented before events_in_progress is
- * modified, so that the callers of pm_check_wakeup_events() and
- * pm_save_wakeup_count() don't see the old value of event_count and
- * events_in_progress equal to zero at the same time.
+ * Increment the counter of registered wakeup events and decrement the
+ * couter of wakeup events in progress simultaneously.
*/
- atomic_inc(&event_count);
- smp_mb__before_atomic_dec();
- atomic_dec(&events_in_progress);
+ atomic_add(MAX_IN_PROGRESS, &combined_event_count);
}
/**
@@ -556,8 +596,10 @@ bool pm_wakeup_pending(void)
spin_lock_irqsave(&events_lock, flags);
if (events_check_enabled) {
- ret = ((unsigned int)atomic_read(&event_count) != saved_count)
- || atomic_read(&events_in_progress);
+ unsigned int cnt, inpr;
+
+ split_counters(&cnt, &inpr);
+ ret = (cnt != saved_count || inpr > 0);
events_check_enabled = !ret;
}
spin_unlock_irqrestore(&events_lock, flags);
@@ -573,25 +615,25 @@ bool pm_wakeup_pending(void)
* Store the number of registered wakeup events at the address in @count. Block
* if the current number of wakeup events being processed is nonzero.
*
- * Return false if the wait for the number of wakeup events being processed to
+ * Return 'false' if the wait for the number of wakeup events being processed to
* drop down to zero has been interrupted by a signal (and the current number
- * of wakeup events being processed is still nonzero). Otherwise return true.
+ * of wakeup events being processed is still nonzero). Otherwise return 'true'.
*/
bool pm_get_wakeup_count(unsigned int *count)
{
- bool ret;
-
- if (capable(CAP_SYS_ADMIN))
- events_check_enabled = false;
+ unsigned int cnt, inpr;
- while (atomic_read(&events_in_progress) && !signal_pending(current)) {
+ for (;;) {
+ split_counters(&cnt, &inpr);
+ if (inpr == 0 || signal_pending(current))
+ break;
pm_wakeup_update_hit_counts();
schedule_timeout_interruptible(msecs_to_jiffies(TIMEOUT));
}
- ret = !atomic_read(&events_in_progress);
- *count = atomic_read(&event_count);
- return ret;
+ split_counters(&cnt, &inpr);
+ *count = cnt;
+ return !inpr;
}
/**
@@ -600,24 +642,25 @@ bool pm_get_wakeup_count(unsigned int *count)
*
* If @count is equal to the current number of registered wakeup events and the
* current number of wakeup events being processed is zero, store @count as the
- * old number of registered wakeup events to be used by pm_check_wakeup_events()
- * and return true. Otherwise return false.
+ * old number of registered wakeup events for pm_check_wakeup_events(), enable
+ * wakeup events detection and return 'true'. Otherwise disable wakeup events
+ * detection and return 'false'.
*/
bool pm_save_wakeup_count(unsigned int count)
{
- bool ret = false;
+ unsigned int cnt, inpr;
+ events_check_enabled = false;
spin_lock_irq(&events_lock);
- if (count == (unsigned int)atomic_read(&event_count)
- && !atomic_read(&events_in_progress)) {
+ split_counters(&cnt, &inpr);
+ if (cnt == count && inpr == 0) {
saved_count = count;
events_check_enabled = true;
- ret = true;
}
spin_unlock_irq(&events_lock);
- if (!ret)
+ if (!events_check_enabled)
pm_wakeup_update_hit_counts();
- return ret;
+ return events_check_enabled;
}
static struct dentry *wakeup_sources_stats_dentry;
diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
new file mode 100644
index 000000000000..90af2943f9e4
--- /dev/null
+++ b/drivers/base/syscore.c
@@ -0,0 +1,117 @@
+/*
+ * syscore.c - Execution of system core operations.
+ *
+ * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/syscore_ops.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+
+static LIST_HEAD(syscore_ops_list);
+static DEFINE_MUTEX(syscore_ops_lock);
+
+/**
+ * register_syscore_ops - Register a set of system core operations.
+ * @ops: System core operations to register.
+ */
+void register_syscore_ops(struct syscore_ops *ops)
+{
+ mutex_lock(&syscore_ops_lock);
+ list_add_tail(&ops->node, &syscore_ops_list);
+ mutex_unlock(&syscore_ops_lock);
+}
+EXPORT_SYMBOL_GPL(register_syscore_ops);
+
+/**
+ * unregister_syscore_ops - Unregister a set of system core operations.
+ * @ops: System core operations to unregister.
+ */
+void unregister_syscore_ops(struct syscore_ops *ops)
+{
+ mutex_lock(&syscore_ops_lock);
+ list_del(&ops->node);
+ mutex_unlock(&syscore_ops_lock);
+}
+EXPORT_SYMBOL_GPL(unregister_syscore_ops);
+
+#ifdef CONFIG_PM_SLEEP
+/**
+ * syscore_suspend - Execute all the registered system core suspend callbacks.
+ *
+ * This function is executed with one CPU on-line and disabled interrupts.
+ */
+int syscore_suspend(void)
+{
+ struct syscore_ops *ops;
+ int ret = 0;
+
+ WARN_ONCE(!irqs_disabled(),
+ "Interrupts enabled before system core suspend.\n");
+
+ list_for_each_entry_reverse(ops, &syscore_ops_list, node)
+ if (ops->suspend) {
+ if (initcall_debug)
+ pr_info("PM: Calling %pF\n", ops->suspend);
+ ret = ops->suspend();
+ if (ret)
+ goto err_out;
+ WARN_ONCE(!irqs_disabled(),
+ "Interrupts enabled after %pF\n", ops->suspend);
+ }
+
+ return 0;
+
+ err_out:
+ pr_err("PM: System core suspend callback %pF failed.\n", ops->suspend);
+
+ list_for_each_entry_continue(ops, &syscore_ops_list, node)
+ if (ops->resume)
+ ops->resume();
+
+ return ret;
+}
+
+/**
+ * syscore_resume - Execute all the registered system core resume callbacks.
+ *
+ * This function is executed with one CPU on-line and disabled interrupts.
+ */
+void syscore_resume(void)
+{
+ struct syscore_ops *ops;
+
+ WARN_ONCE(!irqs_disabled(),
+ "Interrupts enabled before system core resume.\n");
+
+ list_for_each_entry(ops, &syscore_ops_list, node)
+ if (ops->resume) {
+ if (initcall_debug)
+ pr_info("PM: Calling %pF\n", ops->resume);
+ ops->resume();
+ WARN_ONCE(!irqs_disabled(),
+ "Interrupts enabled after %pF\n", ops->resume);
+ }
+}
+#endif /* CONFIG_PM_SLEEP */
+
+/**
+ * syscore_shutdown - Execute all the registered system core shutdown callbacks.
+ */
+void syscore_shutdown(void)
+{
+ struct syscore_ops *ops;
+
+ mutex_lock(&syscore_ops_lock);
+
+ list_for_each_entry_reverse(ops, &syscore_ops_list, node)
+ if (ops->shutdown) {
+ if (initcall_debug)
+ pr_info("PM: Calling %pF\n", ops->shutdown);
+ ops->shutdown();
+ }
+
+ mutex_unlock(&syscore_ops_lock);
+}
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index b9ba04fc2b34..77fc76f8aea9 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3281,7 +3281,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
struct block_device *bdev = opened_bdev[cnt];
if (!bdev || ITYPE(drive_state[cnt].fd_device) != type)
continue;
- __invalidate_device(bdev);
+ __invalidate_device(bdev, true);
}
mutex_unlock(&open_lock);
} else {
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 49e6a545eb63..dbf31ec9114d 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -78,7 +78,6 @@
#include <asm/uaccess.h>
-static DEFINE_MUTEX(loop_mutex);
static LIST_HEAD(loop_devices);
static DEFINE_MUTEX(loop_devices_mutex);
@@ -1501,11 +1500,9 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
{
struct loop_device *lo = bdev->bd_disk->private_data;
- mutex_lock(&loop_mutex);
mutex_lock(&lo->lo_ctl_mutex);
lo->lo_refcnt++;
mutex_unlock(&lo->lo_ctl_mutex);
- mutex_unlock(&loop_mutex);
return 0;
}
@@ -1515,7 +1512,6 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
struct loop_device *lo = disk->private_data;
int err;
- mutex_lock(&loop_mutex);
mutex_lock(&lo->lo_ctl_mutex);
if (--lo->lo_refcnt)
@@ -1540,7 +1536,6 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
out:
mutex_unlock(&lo->lo_ctl_mutex);
out_unlocked:
- mutex_unlock(&loop_mutex);
return 0;
}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index d7aa39e349a6..9cb8668ff5f4 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -120,6 +120,10 @@ static DEFINE_SPINLOCK(minor_lock);
#define EXTENDED (1<<EXT_SHIFT)
#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
+#define EMULATED_HD_DISK_MINOR_OFFSET (0)
+#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
+#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16))
+#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4)
#define DEV_NAME "xvd" /* name in /dev */
@@ -281,7 +285,7 @@ static int blkif_queue_request(struct request *req)
info->shadow[id].request = req;
ring_req->id = id;
- ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
+ ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
ring_req->handle = info->handle;
ring_req->operation = rq_data_dir(req) ?
@@ -317,7 +321,7 @@ static int blkif_queue_request(struct request *req)
rq_data_dir(req) );
info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
- ring_req->seg[i] =
+ ring_req->u.rw.seg[i] =
(struct blkif_request_segment) {
.gref = ref,
.first_sect = fsect,
@@ -434,6 +438,65 @@ static void xlvbd_flush(struct blkfront_info *info)
info->feature_flush ? "enabled" : "disabled");
}
+static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
+{
+ int major;
+ major = BLKIF_MAJOR(vdevice);
+ *minor = BLKIF_MINOR(vdevice);
+ switch (major) {
+ case XEN_IDE0_MAJOR:
+ *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
+ *minor = ((*minor / 64) * PARTS_PER_DISK) +
+ EMULATED_HD_DISK_MINOR_OFFSET;
+ break;
+ case XEN_IDE1_MAJOR:
+ *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
+ *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
+ EMULATED_HD_DISK_MINOR_OFFSET;
+ break;
+ case XEN_SCSI_DISK0_MAJOR:
+ *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
+ *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
+ break;
+ case XEN_SCSI_DISK1_MAJOR:
+ case XEN_SCSI_DISK2_MAJOR:
+ case XEN_SCSI_DISK3_MAJOR:
+ case XEN_SCSI_DISK4_MAJOR:
+ case XEN_SCSI_DISK5_MAJOR:
+ case XEN_SCSI_DISK6_MAJOR:
+ case XEN_SCSI_DISK7_MAJOR:
+ *offset = (*minor / PARTS_PER_DISK) +
+ ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
+ EMULATED_SD_DISK_NAME_OFFSET;
+ *minor = *minor +
+ ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
+ EMULATED_SD_DISK_MINOR_OFFSET;
+ break;
+ case XEN_SCSI_DISK8_MAJOR:
+ case XEN_SCSI_DISK9_MAJOR:
+ case XEN_SCSI_DISK10_MAJOR:
+ case XEN_SCSI_DISK11_MAJOR:
+ case XEN_SCSI_DISK12_MAJOR:
+ case XEN_SCSI_DISK13_MAJOR:
+ case XEN_SCSI_DISK14_MAJOR:
+ case XEN_SCSI_DISK15_MAJOR:
+ *offset = (*minor / PARTS_PER_DISK) +
+ ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
+ EMULATED_SD_DISK_NAME_OFFSET;
+ *minor = *minor +
+ ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
+ EMULATED_SD_DISK_MINOR_OFFSET;
+ break;
+ case XENVBD_MAJOR:
+ *offset = *minor / PARTS_PER_DISK;
+ break;
+ default:
+ printk(KERN_WARNING "blkfront: your disk configuration is "
+ "incorrect, please use an xvd device instead\n");
+ return -ENODEV;
+ }
+ return 0;
+}
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
struct blkfront_info *info,
@@ -441,7 +504,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
{
struct gendisk *gd;
int nr_minors = 1;
- int err = -ENODEV;
+ int err;
unsigned int offset;
int minor;
int nr_parts;
@@ -456,12 +519,20 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
}
if (!VDEV_IS_EXTENDED(info->vdevice)) {
- minor = BLKIF_MINOR(info->vdevice);
- nr_parts = PARTS_PER_DISK;
+ err = xen_translate_vdev(info->vdevice, &minor, &offset);
+ if (err)
+ return err;
+ nr_parts = PARTS_PER_DISK;
} else {
minor = BLKIF_MINOR_EXT(info->vdevice);
nr_parts = PARTS_PER_EXT_DISK;
+ offset = minor / nr_parts;
+ if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4)
+ printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
+ "emulated IDE disks,\n\t choose an xvd device name"
+ "from xvde on\n", info->vdevice);
}
+ err = -ENODEV;
if ((minor % nr_parts) == 0)
nr_minors = nr_parts;
@@ -475,8 +546,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
if (gd == NULL)
goto release;
- offset = minor / nr_parts;
-
if (nr_minors > 1) {
if (offset < 26)
sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
@@ -615,7 +684,7 @@ static void blkif_completion(struct blk_shadow *s)
{
int i;
for (i = 0; i < s->req.nr_segments; i++)
- gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
+ gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
}
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -932,7 +1001,7 @@ static int blkif_recover(struct blkfront_info *info)
/* Rewrite any grant references invalidated by susp/resume. */
for (j = 0; j < req->nr_segments; j++)
gnttab_grant_foreign_access_ref(
- req->seg[j].gref,
+ req->u.rw.seg[j].gref,
info->xbdev->otherend_id,
pfn_to_mfn(info->shadow[req->id].frame[j]),
rq_data_dir(info->shadow[req->id].request));
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index a126e614601f..6dcd55a74c0a 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -39,6 +39,11 @@ static struct usb_device_id ath3k_table[] = {
/* Atheros AR3011 with sflash firmware*/
{ USB_DEVICE(0x0CF3, 0x3002) },
+ /* Atheros AR9285 Malbec with sflash firmware */
+ { USB_DEVICE(0x03F0, 0x311D) },
+
+ /* Atheros AR5BBU12 with sflash firmware */
+ { USB_DEVICE(0x0489, 0xE02C) },
{ } /* Terminating entry */
};
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 1da773f899a2..700a3840fddc 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -102,6 +102,12 @@ static struct usb_device_id blacklist_table[] = {
/* Atheros 3011 with sflash firmware */
{ USB_DEVICE(0x0cf3, 0x3002), .driver_info = BTUSB_IGNORE },
+ /* Atheros AR9285 Malbec with sflash firmware */
+ { USB_DEVICE(0x03f0, 0x311d), .driver_info = BTUSB_IGNORE },
+
+ /* Atheros AR5BBU12 with sflash firmware */
+ { USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE },
+
/* Broadcom BCM2035 */
{ USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_WRONG_SCO_MTU },
{ USB_DEVICE(0x0a5c, 0x200a), .driver_info = BTUSB_WRONG_SCO_MTU },
@@ -826,7 +832,7 @@ static void btusb_work(struct work_struct *work)
if (hdev->conn_hash.sco_num > 0) {
if (!test_bit(BTUSB_DID_ISO_RESUME, &data->flags)) {
- err = usb_autopm_get_interface(data->isoc);
+ err = usb_autopm_get_interface(data->isoc ? data->isoc : data->intf);
if (err < 0) {
clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
usb_kill_anchored_urbs(&data->isoc_anchor);
@@ -855,7 +861,7 @@ static void btusb_work(struct work_struct *work)
__set_isoc_interface(hdev, 0);
if (test_and_clear_bit(BTUSB_DID_ISO_RESUME, &data->flags))
- usb_autopm_put_interface(data->isoc);
+ usb_autopm_put_interface(data->isoc ? data->isoc : data->intf);
}
}
@@ -1038,8 +1044,6 @@ static int btusb_probe(struct usb_interface *intf,
usb_set_intfdata(intf, data);
- usb_enable_autosuspend(interface_to_usbdev(intf));
-
return 0;
}
diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 9252e85706ef..780498d76581 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -773,18 +773,23 @@ int __init agp_amd64_init(void)
#else
printk(KERN_INFO PFX "You can boot with agp=try_unsupported\n");
#endif
+ pci_unregister_driver(&agp_amd64_pci_driver);
return -ENODEV;
}
/* First check that we have at least one AMD64 NB */
- if (!pci_dev_present(amd_nb_misc_ids))
+ if (!pci_dev_present(amd_nb_misc_ids)) {
+ pci_unregister_driver(&agp_amd64_pci_driver);
return -ENODEV;
+ }
/* Look for any AGP bridge */
agp_amd64_pci_driver.id_table = agp_amd64_pci_promisc_table;
err = driver_attach(&agp_amd64_pci_driver.driver);
- if (err == 0 && agp_bridges_found == 0)
+ if (err == 0 && agp_bridges_found == 0) {
+ pci_unregister_driver(&agp_amd64_pci_driver);
err = -ENODEV;
+ }
}
return err;
}
diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h
index c195bfeade11..5feebe2800e9 100644
--- a/drivers/char/agp/intel-agp.h
+++ b/drivers/char/agp/intel-agp.h
@@ -130,6 +130,7 @@
#define INTEL_GMCH_GMS_STOLEN_352M (0xd << 4)
#define I915_IFPADDR 0x60
+#define I830_HIC 0x70
/* Intel 965G registers */
#define I965_MSAC 0x62
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index fab3d3265adb..0d09b537bb9a 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -21,6 +21,7 @@
#include <linux/kernel.h>
#include <linux/pagemap.h>
#include <linux/agp_backend.h>
+#include <linux/delay.h>
#include <asm/smp.h>
#include "agp.h"
#include "intel-agp.h"
@@ -70,12 +71,8 @@ static struct _intel_private {
u32 __iomem *gtt; /* I915G */
bool clear_fake_agp; /* on first access via agp, fill with scratch */
int num_dcache_entries;
- union {
- void __iomem *i9xx_flush_page;
- void *i8xx_flush_page;
- };
+ void __iomem *i9xx_flush_page;
char *i81x_gtt_table;
- struct page *i8xx_page;
struct resource ifp_resource;
int resource_valid;
struct page *scratch_page;
@@ -722,28 +719,6 @@ static int intel_fake_agp_fetch_size(void)
static void i830_cleanup(void)
{
- if (intel_private.i8xx_flush_page) {
- kunmap(intel_private.i8xx_flush_page);
- intel_private.i8xx_flush_page = NULL;
- }
-
- __free_page(intel_private.i8xx_page);
- intel_private.i8xx_page = NULL;
-}
-
-static void intel_i830_setup_flush(void)
-{
- /* return if we've already set the flush mechanism up */
- if (intel_private.i8xx_page)
- return;
-
- intel_private.i8xx_page = alloc_page(GFP_KERNEL);
- if (!intel_private.i8xx_page)
- return;
-
- intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page);
- if (!intel_private.i8xx_flush_page)
- i830_cleanup();
}
/* The chipset_flush interface needs to get data that has already been
@@ -758,14 +733,27 @@ static void intel_i830_setup_flush(void)
*/
static void i830_chipset_flush(void)
{
- unsigned int *pg = intel_private.i8xx_flush_page;
+ unsigned long timeout = jiffies + msecs_to_jiffies(1000);
+
+ /* Forcibly evict everything from the CPU write buffers.
+ * clflush appears to be insufficient.
+ */
+ wbinvd_on_all_cpus();
+
+ /* Now we've only seen documents for this magic bit on 855GM,
+ * we hope it exists for the other gen2 chipsets...
+ *
+ * Also works as advertised on my 845G.
+ */
+ writel(readl(intel_private.registers+I830_HIC) | (1<<31),
+ intel_private.registers+I830_HIC);
- memset(pg, 0, 1024);
+ while (readl(intel_private.registers+I830_HIC) & (1<<31)) {
+ if (time_after(jiffies, timeout))
+ break;
- if (cpu_has_clflush)
- clflush_cache_range(pg, 1024);
- else if (wbinvd_on_all_cpus() != 0)
- printk(KERN_ERR "Timed out waiting for cache flush.\n");
+ udelay(50);
+ }
}
static void i830_write_entry(dma_addr_t addr, unsigned int entry,
@@ -849,8 +837,6 @@ static int i830_setup(void)
intel_private.gtt_bus_addr = reg_addr + I810_PTE_BASE;
- intel_i830_setup_flush();
-
return 0;
}
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index d31483c54883..beecd1cf9b99 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -198,3 +198,15 @@ config HW_RANDOM_NOMADIK
module will be called nomadik-rng.
If unsure, say Y.
+
+config HW_RANDOM_PICOXCELL
+ tristate "Picochip picoXcell true random number generator support"
+ depends on HW_RANDOM && ARCH_PICOXCELL && PICOXCELL_PC3X3
+ ---help---
+ This driver provides kernel-side support for the Random Number
+ Generator hardware found on Picochip PC3x3 and later devices.
+
+ To compile this driver as a module, choose M here: the
+ module will be called picoxcell-rng.
+
+ If unsure, say Y.
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 4273308aa1e3..3db4eb8b19c0 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -19,3 +19,4 @@ obj-$(CONFIG_HW_RANDOM_TX4939) += tx4939-rng.o
obj-$(CONFIG_HW_RANDOM_MXC_RNGA) += mxc-rnga.o
obj-$(CONFIG_HW_RANDOM_OCTEON) += octeon-rng.o
obj-$(CONFIG_HW_RANDOM_NOMADIK) += nomadik-rng.o
+obj-$(CONFIG_HW_RANDOM_PICOXCELL) += picoxcell-rng.o
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index 06aad0831c73..2cc755a64302 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -91,7 +91,7 @@ static struct hwrng omap_rng_ops = {
static int __devinit omap_rng_probe(struct platform_device *pdev)
{
- struct resource *res, *mem;
+ struct resource *res;
int ret;
/*
@@ -116,14 +116,12 @@ static int __devinit omap_rng_probe(struct platform_device *pdev)
if (!res)
return -ENOENT;
- mem = request_mem_region(res->start, resource_size(res),
- pdev->name);
- if (mem == NULL) {
+ if (!request_mem_region(res->start, resource_size(res), pdev->name)) {
ret = -EBUSY;
goto err_region;
}
- dev_set_drvdata(&pdev->dev, mem);
+ dev_set_drvdata(&pdev->dev, res);
rng_base = ioremap(res->start, resource_size(res));
if (!rng_base) {
ret = -ENOMEM;
@@ -146,7 +144,7 @@ err_register:
iounmap(rng_base);
rng_base = NULL;
err_ioremap:
- release_resource(mem);
+ release_mem_region(res->start, resource_size(res));
err_region:
if (cpu_is_omap24xx()) {
clk_disable(rng_ick);
@@ -157,7 +155,7 @@ err_region:
static int __exit omap_rng_remove(struct platform_device *pdev)
{
- struct resource *mem = dev_get_drvdata(&pdev->dev);
+ struct resource *res = dev_get_drvdata(&pdev->dev);
hwrng_unregister(&omap_rng_ops);
@@ -170,7 +168,7 @@ static int __exit omap_rng_remove(struct platform_device *pdev)
clk_put(rng_ick);
}
- release_resource(mem);
+ release_mem_region(res->start, resource_size(res));
rng_base = NULL;
return 0;
diff --git a/drivers/char/hw_random/picoxcell-rng.c b/drivers/char/hw_random/picoxcell-rng.c
new file mode 100644
index 000000000000..990d55a5e3e8
--- /dev/null
+++ b/drivers/char/hw_random/picoxcell-rng.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2010-2011 Picochip Ltd., Jamie Iles
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * All enquiries to support@picochip.com
+ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/hw_random.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#define DATA_REG_OFFSET 0x0200
+#define CSR_REG_OFFSET 0x0278
+#define CSR_OUT_EMPTY_MASK (1 << 24)
+#define CSR_FAULT_MASK (1 << 1)
+#define TRNG_BLOCK_RESET_MASK (1 << 0)
+#define TAI_REG_OFFSET 0x0380
+
+/*
+ * The maximum amount of time in microseconds to spend waiting for data if the
+ * core wants us to wait. The TRNG should generate 32 bits every 320ns so a
+ * timeout of 20us seems reasonable. The TRNG does builtin tests of the data
+ * for randomness so we can't always assume there is data present.
+ */
+#define PICO_TRNG_TIMEOUT 20
+
+static void __iomem *rng_base;
+static struct clk *rng_clk;
+struct device *rng_dev;
+
+static inline u32 picoxcell_trng_read_csr(void)
+{
+ return __raw_readl(rng_base + CSR_REG_OFFSET);
+}
+
+static inline bool picoxcell_trng_is_empty(void)
+{
+ return picoxcell_trng_read_csr() & CSR_OUT_EMPTY_MASK;
+}
+
+/*
+ * Take the random number generator out of reset and make sure the interrupts
+ * are masked. We shouldn't need to get large amounts of random bytes so just
+ * poll the status register. The hardware generates 32 bits every 320ns so we
+ * shouldn't have to wait long enough to warrant waiting for an IRQ.
+ */
+static void picoxcell_trng_start(void)
+{
+ __raw_writel(0, rng_base + TAI_REG_OFFSET);
+ __raw_writel(0, rng_base + CSR_REG_OFFSET);
+}
+
+static void picoxcell_trng_reset(void)
+{
+ __raw_writel(TRNG_BLOCK_RESET_MASK, rng_base + CSR_REG_OFFSET);
+ __raw_writel(TRNG_BLOCK_RESET_MASK, rng_base + TAI_REG_OFFSET);
+ picoxcell_trng_start();
+}
+
+/*
+ * Get some random data from the random number generator. The hw_random core
+ * layer provides us with locking.
+ */
+static int picoxcell_trng_read(struct hwrng *rng, void *buf, size_t max,
+ bool wait)
+{
+ int i;
+
+ /* Wait for some data to become available. */
+ for (i = 0; i < PICO_TRNG_TIMEOUT && picoxcell_trng_is_empty(); ++i) {
+ if (!wait)
+ return 0;
+
+ udelay(1);
+ }
+
+ if (picoxcell_trng_read_csr() & CSR_FAULT_MASK) {
+ dev_err(rng_dev, "fault detected, resetting TRNG\n");
+ picoxcell_trng_reset();
+ return -EIO;
+ }
+
+ if (i == PICO_TRNG_TIMEOUT)
+ return 0;
+
+ *(u32 *)buf = __raw_readl(rng_base + DATA_REG_OFFSET);
+ return sizeof(u32);
+}
+
+static struct hwrng picoxcell_trng = {
+ .name = "picoxcell",
+ .read = picoxcell_trng_read,
+};
+
+static int picoxcell_trng_probe(struct platform_device *pdev)
+{
+ int ret;
+ struct resource *mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+ if (!mem) {
+ dev_warn(&pdev->dev, "no memory resource\n");
+ return -ENOMEM;
+ }
+
+ if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem),
+ "picoxcell_trng")) {
+ dev_warn(&pdev->dev, "unable to request io mem\n");
+ return -EBUSY;
+ }
+
+ rng_base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
+ if (!rng_base) {
+ dev_warn(&pdev->dev, "unable to remap io mem\n");
+ return -ENOMEM;
+ }
+
+ rng_clk = clk_get(&pdev->dev, NULL);
+ if (IS_ERR(rng_clk)) {
+ dev_warn(&pdev->dev, "no clk\n");
+ return PTR_ERR(rng_clk);
+ }
+
+ ret = clk_enable(rng_clk);
+ if (ret) {
+ dev_warn(&pdev->dev, "unable to enable clk\n");
+ goto err_enable;
+ }
+
+ picoxcell_trng_start();
+ ret = hwrng_register(&picoxcell_trng);
+ if (ret)
+ goto err_register;
+
+ rng_dev = &pdev->dev;
+ dev_info(&pdev->dev, "pixoxcell random number generator active\n");
+
+ return 0;
+
+err_register:
+ clk_disable(rng_clk);
+err_enable:
+ clk_put(rng_clk);
+
+ return ret;
+}
+
+static int __devexit picoxcell_trng_remove(struct platform_device *pdev)
+{
+ hwrng_unregister(&picoxcell_trng);
+ clk_disable(rng_clk);
+ clk_put(rng_clk);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int picoxcell_trng_suspend(struct device *dev)
+{
+ clk_disable(rng_clk);
+
+ return 0;
+}
+
+static int picoxcell_trng_resume(struct device *dev)
+{
+ return clk_enable(rng_clk);
+}
+
+static const struct dev_pm_ops picoxcell_trng_pm_ops = {
+ .suspend = picoxcell_trng_suspend,
+ .resume = picoxcell_trng_resume,
+};
+#endif /* CONFIG_PM */
+
+static struct platform_driver picoxcell_trng_driver = {
+ .probe = picoxcell_trng_probe,
+ .remove = __devexit_p(picoxcell_trng_remove),
+ .driver = {
+ .name = "picoxcell-trng",
+ .owner = THIS_MODULE,
+#ifdef CONFIG_PM
+ .pm = &picoxcell_trng_pm_ops,
+#endif /* CONFIG_PM */
+ },
+};
+
+static int __init picoxcell_trng_init(void)
+{
+ return platform_driver_register(&picoxcell_trng_driver);
+}
+module_init(picoxcell_trng_init);
+
+static void __exit picoxcell_trng_exit(void)
+{
+ platform_driver_unregister(&picoxcell_trng_driver);
+}
+module_exit(picoxcell_trng_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jamie Iles");
+MODULE_DESCRIPTION("Picochip picoXcell TRNG driver");
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 7855f9f45b8e..62787e30d508 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -900,6 +900,14 @@ static void sender(void *send_info,
printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec);
#endif
+ /*
+ * last_timeout_jiffies is updated here to avoid
+ * smi_timeout() handler passing very large time_diff
+ * value to smi_event_handler() that causes
+ * the send command to abort.
+ */
+ smi_info->last_timeout_jiffies = jiffies;
+
mod_timer(&smi_info->si_timer, jiffies + SI_TIMEOUT_JIFFIES);
if (smi_info->thread)
diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c
index e6d75627c6c8..33dc2298af73 100644
--- a/drivers/char/mmtimer.c
+++ b/drivers/char/mmtimer.c
@@ -53,6 +53,8 @@ MODULE_LICENSE("GPL");
#define RTC_BITS 55 /* 55 bits for this implementation */
+static struct k_clock sgi_clock;
+
extern unsigned long sn_rtc_cycles_per_second;
#define RTC_COUNTER_ADDR ((long *)LOCAL_MMR_ADDR(SH_RTC))
@@ -487,7 +489,7 @@ static int sgi_clock_get(clockid_t clockid, struct timespec *tp)
return 0;
};
-static int sgi_clock_set(clockid_t clockid, struct timespec *tp)
+static int sgi_clock_set(const clockid_t clockid, const struct timespec *tp)
{
u64 nsec;
@@ -763,15 +765,21 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,
return err;
}
+static int sgi_clock_getres(const clockid_t which_clock, struct timespec *tp)
+{
+ tp->tv_sec = 0;
+ tp->tv_nsec = sgi_clock_period;
+ return 0;
+}
+
static struct k_clock sgi_clock = {
- .res = 0,
- .clock_set = sgi_clock_set,
- .clock_get = sgi_clock_get,
- .timer_create = sgi_timer_create,
- .nsleep = do_posix_clock_nonanosleep,
- .timer_set = sgi_timer_set,
- .timer_del = sgi_timer_del,
- .timer_get = sgi_timer_get
+ .clock_set = sgi_clock_set,
+ .clock_get = sgi_clock_get,
+ .clock_getres = sgi_clock_getres,
+ .timer_create = sgi_timer_create,
+ .timer_set = sgi_timer_set,
+ .timer_del = sgi_timer_del,
+ .timer_get = sgi_timer_get
};
/**
@@ -831,8 +839,8 @@ static int __init mmtimer_init(void)
(unsigned long) node);
}
- sgi_clock_period = sgi_clock.res = NSEC_PER_SEC / sn_rtc_cycles_per_second;
- register_posix_clock(CLOCK_SGI_CYCLE, &sgi_clock);
+ sgi_clock_period = NSEC_PER_SEC / sn_rtc_cycles_per_second;
+ posix_timers_register_clock(CLOCK_SGI_CYCLE, &sgi_clock);
printk(KERN_INFO "%s: v%s, %ld MHz\n", MMTIMER_DESC, MMTIMER_VERSION,
sn_rtc_cycles_per_second/(unsigned long)1E6);
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 777181a2e603..bcbbc71febb7 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -830,8 +830,7 @@ static void monitor_card(unsigned long p)
test_bit(IS_ANY_T1, &dev->flags))) {
DEBUGP(4, dev, "Perform AUTOPPS\n");
set_bit(IS_AUTOPPS_ACT, &dev->flags);
- ptsreq.protocol = ptsreq.protocol =
- (0x01 << dev->proto);
+ ptsreq.protocol = (0x01 << dev->proto);
ptsreq.flags = 0x01;
ptsreq.pts1 = 0x00;
ptsreq.pts2 = 0x00;
diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c
index 94b8eb4d691d..444155a305ae 100644
--- a/drivers/char/pcmcia/ipwireless/main.c
+++ b/drivers/char/pcmcia/ipwireless/main.c
@@ -78,7 +78,6 @@ static void signalled_reboot_callback(void *callback_data)
static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
{
struct ipw_dev *ipw = priv_data;
- struct resource *io_resource;
int ret;
p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
@@ -92,9 +91,12 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
if (ret)
return ret;
- io_resource = request_region(p_dev->resource[0]->start,
- resource_size(p_dev->resource[0]),
- IPWIRELESS_PCCARD_NAME);
+ if (!request_region(p_dev->resource[0]->start,
+ resource_size(p_dev->resource[0]),
+ IPWIRELESS_PCCARD_NAME)) {
+ ret = -EBUSY;
+ goto exit;
+ }
p_dev->resource[2]->flags |=
WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM | WIN_ENABLE;
@@ -105,22 +107,25 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
ret = pcmcia_map_mem_page(p_dev, p_dev->resource[2], p_dev->card_addr);
if (ret != 0)
- goto exit2;
+ goto exit1;
ipw->is_v2_card = resource_size(p_dev->resource[2]) == 0x100;
- ipw->attr_memory = ioremap(p_dev->resource[2]->start,
+ ipw->common_memory = ioremap(p_dev->resource[2]->start,
resource_size(p_dev->resource[2]));
- request_mem_region(p_dev->resource[2]->start,
- resource_size(p_dev->resource[2]),
- IPWIRELESS_PCCARD_NAME);
+ if (!request_mem_region(p_dev->resource[2]->start,
+ resource_size(p_dev->resource[2]),
+ IPWIRELESS_PCCARD_NAME)) {
+ ret = -EBUSY;
+ goto exit2;
+ }
p_dev->resource[3]->flags |= WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_AM |
WIN_ENABLE;
p_dev->resource[3]->end = 0; /* this used to be 0x1000 */
ret = pcmcia_request_window(p_dev, p_dev->resource[3], 0);
if (ret != 0)
- goto exit2;
+ goto exit3;
ret = pcmcia_map_mem_page(p_dev, p_dev->resource[3], 0);
if (ret != 0)
@@ -128,23 +133,28 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, void *priv_data)
ipw->attr_memory = ioremap(p_dev->resource[3]->start,
resource_size(p_dev->resource[3]));
- request_mem_region(p_dev->resource[3]->start,
- resource_size(p_dev->resource[3]),
- IPWIRELESS_PCCARD_NAME);
+ if (!request_mem_region(p_dev->resource[3]->start,
+ resource_size(p_dev->resource[3]),
+ IPWIRELESS_PCCARD_NAME)) {
+ ret = -EBUSY;
+ goto exit4;
+ }
return 0;
+exit4:
+ iounmap(ipw->attr_memory);
exit3:
+ release_mem_region(p_dev->resource[2]->start,
+ resource_size(p_dev->resource[2]));
exit2:
- if (ipw->common_memory) {
- release_mem_region(p_dev->resource[2]->start,
- resource_size(p_dev->resource[2]));
- iounmap(ipw->common_memory);
- }
+ iounmap(ipw->common_memory);
exit1:
- release_resource(io_resource);
+ release_region(p_dev->resource[0]->start,
+ resource_size(p_dev->resource[0]));
+exit:
pcmcia_disable_device(p_dev);
- return -1;
+ return ret;
}
static int config_ipwireless(struct ipw_dev *ipw)
@@ -219,6 +229,8 @@ exit:
static void release_ipwireless(struct ipw_dev *ipw)
{
+ release_region(ipw->link->resource[0]->start,
+ resource_size(ipw->link->resource[0]));
if (ipw->common_memory) {
release_mem_region(ipw->link->resource[2]->start,
resource_size(ipw->link->resource[2]));
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 72a4fcb17745..5e29e8031bbc 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -128,6 +128,7 @@
* void add_input_randomness(unsigned int type, unsigned int code,
* unsigned int value);
* void add_interrupt_randomness(int irq);
+ * void add_disk_randomness(struct gendisk *disk);
*
* add_input_randomness() uses the input layer interrupt timing, as well as
* the event type information from the hardware.
@@ -136,9 +137,15 @@
* inputs to the entropy pool. Note that not all interrupts are good
* sources of randomness! For example, the timer interrupts is not a
* good choice, because the periodicity of the interrupts is too
- * regular, and hence predictable to an attacker. Disk interrupts are
- * a better measure, since the timing of the disk interrupts are more
- * unpredictable.
+ * regular, and hence predictable to an attacker. Network Interface
+ * Controller interrupts are a better measure, since the timing of the
+ * NIC interrupts are more unpredictable.
+ *
+ * add_disk_randomness() uses what amounts to the seek time of block
+ * layer request events, on a per-disk_devt basis, as input to the
+ * entropy pool. Note that high-speed solid state drives with very low
+ * seek times do not make for good sources of entropy, as their seek
+ * times are usually fairly consistent.
*
* All of these routines try to estimate how many bits of randomness a
* particular randomness source. They do this by keeping track of the
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index faf5a2c65926..1f46f1cd9225 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -364,14 +364,12 @@ unsigned long tpm_calc_ordinal_duration(struct tpm_chip *chip,
tpm_protected_ordinal_duration[ordinal &
TPM_PROTECTED_ORDINAL_MASK];
- if (duration_idx != TPM_UNDEFINED) {
+ if (duration_idx != TPM_UNDEFINED)
duration = chip->vendor.duration[duration_idx];
- /* if duration is 0, it's because chip->vendor.duration wasn't */
- /* filled yet, so we set the lowest timeout just to give enough */
- /* time for tpm_get_timeouts() to succeed */
- return (duration <= 0 ? HZ : duration);
- } else
+ if (duration <= 0)
return 2 * 60 * HZ;
+ else
+ return duration;
}
EXPORT_SYMBOL_GPL(tpm_calc_ordinal_duration);
@@ -577,11 +575,9 @@ duration:
if (rc)
return;
- if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 ||
- be32_to_cpu(tpm_cmd.header.out.length)
- != sizeof(tpm_cmd.header.out) + sizeof(u32) + 3 * sizeof(u32))
+ if (be32_to_cpu(tpm_cmd.header.out.return_code)
+ != 3 * sizeof(u32))
return;
-
duration_cap = &tpm_cmd.params.getcap_out.cap.duration;
chip->vendor.duration[TPM_SHORT] =
usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short));
@@ -941,18 +937,6 @@ ssize_t tpm_show_caps_1_2(struct device * dev,
}
EXPORT_SYMBOL_GPL(tpm_show_caps_1_2);
-ssize_t tpm_show_timeouts(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct tpm_chip *chip = dev_get_drvdata(dev);
-
- return sprintf(buf, "%d %d %d\n",
- jiffies_to_usecs(chip->vendor.duration[TPM_SHORT]),
- jiffies_to_usecs(chip->vendor.duration[TPM_MEDIUM]),
- jiffies_to_usecs(chip->vendor.duration[TPM_LONG]));
-}
-EXPORT_SYMBOL_GPL(tpm_show_timeouts);
-
ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index d84ff772c26f..72ddb031b69a 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -56,8 +56,6 @@ extern ssize_t tpm_show_owned(struct device *, struct device_attribute *attr,
char *);
extern ssize_t tpm_show_temp_deactivated(struct device *,
struct device_attribute *attr, char *);
-extern ssize_t tpm_show_timeouts(struct device *,
- struct device_attribute *attr, char *);
struct tpm_chip;
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 0d1d38e5f266..dd21df55689d 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -376,7 +376,6 @@ static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
NULL);
static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL);
static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
-static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
static struct attribute *tis_attrs[] = {
&dev_attr_pubek.attr,
@@ -386,8 +385,7 @@ static struct attribute *tis_attrs[] = {
&dev_attr_owned.attr,
&dev_attr_temp_deactivated.attr,
&dev_attr_caps.attr,
- &dev_attr_cancel.attr,
- &dev_attr_timeouts.attr, NULL,
+ &dev_attr_cancel.attr, NULL,
};
static struct attribute_group tis_attr_grp = {
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 490393186338..84b164d1eb2b 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -388,6 +388,10 @@ static void discard_port_data(struct port *port)
unsigned int len;
int ret;
+ if (!port->portdev) {
+ /* Device has been unplugged. vqs are already gone. */
+ return;
+ }
vq = port->in_vq;
if (port->inbuf)
buf = port->inbuf;
@@ -470,6 +474,10 @@ static void reclaim_consumed_buffers(struct port *port)
void *buf;
unsigned int len;
+ if (!port->portdev) {
+ /* Device has been unplugged. vqs are already gone. */
+ return;
+ }
while ((buf = virtqueue_get_buf(port->out_vq, &len))) {
kfree(buf);
port->outvq_full = false;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 1109f6848a43..5cb4d09919d6 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1919,8 +1919,10 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
ret = sysdev_driver_register(&cpu_sysdev_class,
&cpufreq_sysdev_driver);
+ if (ret)
+ goto err_null_driver;
- if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) {
+ if (!(cpufreq_driver->flags & CPUFREQ_STICKY)) {
int i;
ret = -ENODEV;
@@ -1935,21 +1937,22 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
if (ret) {
dprintk("no CPU initialized for driver %s\n",
driver_data->name);
- sysdev_driver_unregister(&cpu_sysdev_class,
- &cpufreq_sysdev_driver);
-
- spin_lock_irqsave(&cpufreq_driver_lock, flags);
- cpufreq_driver = NULL;
- spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
+ goto err_sysdev_unreg;
}
}
- if (!ret) {
- register_hotcpu_notifier(&cpufreq_cpu_notifier);
- dprintk("driver %s up and running\n", driver_data->name);
- cpufreq_debug_enable_ratelimit();
- }
+ register_hotcpu_notifier(&cpufreq_cpu_notifier);
+ dprintk("driver %s up and running\n", driver_data->name);
+ cpufreq_debug_enable_ratelimit();
+ return 0;
+err_sysdev_unreg:
+ sysdev_driver_unregister(&cpu_sysdev_class,
+ &cpufreq_sysdev_driver);
+err_null_driver:
+ spin_lock_irqsave(&cpufreq_driver_lock, flags);
+ cpufreq_driver = NULL;
+ spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_register_driver);
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 526bfbf69611..94284c8473b1 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -81,8 +81,6 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */
*/
static DEFINE_MUTEX(dbs_mutex);
-static struct workqueue_struct *kconservative_wq;
-
static struct dbs_tuners {
unsigned int sampling_rate;
unsigned int sampling_down_factor;
@@ -560,7 +558,7 @@ static void do_dbs_timer(struct work_struct *work)
dbs_check_cpu(dbs_info);
- queue_delayed_work_on(cpu, kconservative_wq, &dbs_info->work, delay);
+ schedule_delayed_work_on(cpu, &dbs_info->work, delay);
mutex_unlock(&dbs_info->timer_mutex);
}
@@ -572,8 +570,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
dbs_info->enable = 1;
INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
- queue_delayed_work_on(dbs_info->cpu, kconservative_wq, &dbs_info->work,
- delay);
+ schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
}
static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
@@ -716,25 +713,12 @@ struct cpufreq_governor cpufreq_gov_conservative = {
static int __init cpufreq_gov_dbs_init(void)
{
- int err;
-
- kconservative_wq = create_workqueue("kconservative");
- if (!kconservative_wq) {
- printk(KERN_ERR "Creation of kconservative failed\n");
- return -EFAULT;
- }
-
- err = cpufreq_register_governor(&cpufreq_gov_conservative);
- if (err)
- destroy_workqueue(kconservative_wq);
-
- return err;
+ return cpufreq_register_governor(&cpufreq_gov_conservative);
}
static void __exit cpufreq_gov_dbs_exit(void)
{
cpufreq_unregister_governor(&cpufreq_gov_conservative);
- destroy_workqueue(kconservative_wq);
}
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index c631f27a3dcc..58aa85ea5ec6 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -104,8 +104,6 @@ static unsigned int dbs_enable; /* number of CPUs using this policy */
*/
static DEFINE_MUTEX(dbs_mutex);
-static struct workqueue_struct *kondemand_wq;
-
static struct dbs_tuners {
unsigned int sampling_rate;
unsigned int up_threshold;
@@ -667,7 +665,7 @@ static void do_dbs_timer(struct work_struct *work)
__cpufreq_driver_target(dbs_info->cur_policy,
dbs_info->freq_lo, CPUFREQ_RELATION_H);
}
- queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
+ schedule_delayed_work_on(cpu, &dbs_info->work, delay);
mutex_unlock(&dbs_info->timer_mutex);
}
@@ -681,8 +679,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
dbs_info->sample_type = DBS_NORMAL_SAMPLE;
INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
- queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work,
- delay);
+ schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
}
static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
@@ -814,7 +811,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
static int __init cpufreq_gov_dbs_init(void)
{
- int err;
cputime64_t wall;
u64 idle_time;
int cpu = get_cpu();
@@ -838,22 +834,12 @@ static int __init cpufreq_gov_dbs_init(void)
MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
}
- kondemand_wq = create_workqueue("kondemand");
- if (!kondemand_wq) {
- printk(KERN_ERR "Creation of kondemand failed\n");
- return -EFAULT;
- }
- err = cpufreq_register_governor(&cpufreq_gov_ondemand);
- if (err)
- destroy_workqueue(kondemand_wq);
-
- return err;
+ return cpufreq_register_governor(&cpufreq_gov_ondemand);
}
static void __exit cpufreq_gov_dbs_exit(void)
{
cpufreq_unregister_governor(&cpufreq_gov_ondemand);
- destroy_workqueue(kondemand_wq);
}
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index eab2cf7a0269..e54185223c8c 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -252,4 +252,21 @@ config CRYPTO_DEV_OMAP_AES
OMAP processors have AES module accelerator. Select this if you
want to use the OMAP module for AES algorithms.
+config CRYPTO_DEV_PICOXCELL
+ tristate "Support for picoXcell IPSEC and Layer2 crypto engines"
+ depends on ARCH_PICOXCELL
+ select CRYPTO_AES
+ select CRYPTO_AUTHENC
+ select CRYPTO_ALGAPI
+ select CRYPTO_DES
+ select CRYPTO_CBC
+ select CRYPTO_ECB
+ select CRYPTO_SEQIV
+ help
+ This option enables support for the hardware offload engines in the
+ Picochip picoXcell SoC devices. Select this for IPSEC ESP offload
+ and for 3gpp Layer 2 ciphering support.
+
+ Saying m here will build a module named pipcoxcell_crypto.
+
endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 256697330a41..5203e34248d7 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -10,4 +10,4 @@ obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/
obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
-
+obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index add2a1a72ba4..5b970d9e9956 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -839,9 +839,9 @@ static int omap_aes_probe(struct platform_device *pdev)
/* Initializing the clock */
dd->iclk = clk_get(dev, "ick");
- if (!dd->iclk) {
+ if (IS_ERR(dd->iclk)) {
dev_err(dev, "clock intialization failed.\n");
- err = -ENODEV;
+ err = PTR_ERR(dd->iclk);
goto err_res;
}
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 2e71123516e0..465cde3e4f60 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -1206,9 +1206,9 @@ static int __devinit omap_sham_probe(struct platform_device *pdev)
/* Initializing the clock */
dd->iclk = clk_get(dev, "ick");
- if (!dd->iclk) {
+ if (IS_ERR(dd->iclk)) {
dev_err(dev, "clock intialization failed.\n");
- err = -ENODEV;
+ err = PTR_ERR(dd->iclk);
goto clk_err;
}
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
new file mode 100644
index 000000000000..b092d0a65837
--- /dev/null
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -0,0 +1,1867 @@
+/*
+ * Copyright (c) 2010-2011 Picochip Ltd., Jamie Iles
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <crypto/aead.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/authenc.h>
+#include <crypto/des.h>
+#include <crypto/md5.h>
+#include <crypto/sha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/clk.h>
+#include <linux/crypto.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/rtnetlink.h>
+#include <linux/scatterlist.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+
+#include "picoxcell_crypto_regs.h"
+
+/*
+ * The threshold for the number of entries in the CMD FIFO available before
+ * the CMD0_CNT interrupt is raised. Increasing this value will reduce the
+ * number of interrupts raised to the CPU.
+ */
+#define CMD0_IRQ_THRESHOLD 1
+
+/*
+ * The timeout period (in jiffies) for a PDU. When the the number of PDUs in
+ * flight is greater than the STAT_IRQ_THRESHOLD or 0 the timer is disabled.
+ * When there are packets in flight but lower than the threshold, we enable
+ * the timer and at expiry, attempt to remove any processed packets from the
+ * queue and if there are still packets left, schedule the timer again.
+ */
+#define PACKET_TIMEOUT 1
+
+/* The priority to register each algorithm with. */
+#define SPACC_CRYPTO_ALG_PRIORITY 10000
+
+#define SPACC_CRYPTO_KASUMI_F8_KEY_LEN 16
+#define SPACC_CRYPTO_IPSEC_CIPHER_PG_SZ 64
+#define SPACC_CRYPTO_IPSEC_HASH_PG_SZ 64
+#define SPACC_CRYPTO_IPSEC_MAX_CTXS 32
+#define SPACC_CRYPTO_IPSEC_FIFO_SZ 32
+#define SPACC_CRYPTO_L2_CIPHER_PG_SZ 64
+#define SPACC_CRYPTO_L2_HASH_PG_SZ 64
+#define SPACC_CRYPTO_L2_MAX_CTXS 128
+#define SPACC_CRYPTO_L2_FIFO_SZ 128
+
+#define MAX_DDT_LEN 16
+
+/* DDT format. This must match the hardware DDT format exactly. */
+struct spacc_ddt {
+ dma_addr_t p;
+ u32 len;
+};
+
+/*
+ * Asynchronous crypto request structure.
+ *
+ * This structure defines a request that is either queued for processing or
+ * being processed.
+ */
+struct spacc_req {
+ struct list_head list;
+ struct spacc_engine *engine;
+ struct crypto_async_request *req;
+ int result;
+ bool is_encrypt;
+ unsigned ctx_id;
+ dma_addr_t src_addr, dst_addr;
+ struct spacc_ddt *src_ddt, *dst_ddt;
+ void (*complete)(struct spacc_req *req);
+
+ /* AEAD specific bits. */
+ u8 *giv;
+ size_t giv_len;
+ dma_addr_t giv_pa;
+};
+
+struct spacc_engine {
+ void __iomem *regs;
+ struct list_head pending;
+ int next_ctx;
+ spinlock_t hw_lock;
+ int in_flight;
+ struct list_head completed;
+ struct list_head in_progress;
+ struct tasklet_struct complete;
+ unsigned long fifo_sz;
+ void __iomem *cipher_ctx_base;
+ void __iomem *hash_key_base;
+ struct spacc_alg *algs;
+ unsigned num_algs;
+ struct list_head registered_algs;
+ size_t cipher_pg_sz;
+ size_t hash_pg_sz;
+ const char *name;
+ struct clk *clk;
+ struct device *dev;
+ unsigned max_ctxs;
+ struct timer_list packet_timeout;
+ unsigned stat_irq_thresh;
+ struct dma_pool *req_pool;
+};
+
+/* Algorithm type mask. */
+#define SPACC_CRYPTO_ALG_MASK 0x7
+
+/* SPACC definition of a crypto algorithm. */
+struct spacc_alg {
+ unsigned long ctrl_default;
+ unsigned long type;
+ struct crypto_alg alg;
+ struct spacc_engine *engine;
+ struct list_head entry;
+ int key_offs;
+ int iv_offs;
+};
+
+/* Generic context structure for any algorithm type. */
+struct spacc_generic_ctx {
+ struct spacc_engine *engine;
+ int flags;
+ int key_offs;
+ int iv_offs;
+};
+
+/* Block cipher context. */
+struct spacc_ablk_ctx {
+ struct spacc_generic_ctx generic;
+ u8 key[AES_MAX_KEY_SIZE];
+ u8 key_len;
+ /*
+ * The fallback cipher. If the operation can't be done in hardware,
+ * fallback to a software version.
+ */
+ struct crypto_ablkcipher *sw_cipher;
+};
+
+/* AEAD cipher context. */
+struct spacc_aead_ctx {
+ struct spacc_generic_ctx generic;
+ u8 cipher_key[AES_MAX_KEY_SIZE];
+ u8 hash_ctx[SPACC_CRYPTO_IPSEC_HASH_PG_SZ];
+ u8 cipher_key_len;
+ u8 hash_key_len;
+ struct crypto_aead *sw_cipher;
+ size_t auth_size;
+ u8 salt[AES_BLOCK_SIZE];
+};
+
+static inline struct spacc_alg *to_spacc_alg(struct crypto_alg *alg)
+{
+ return alg ? container_of(alg, struct spacc_alg, alg) : NULL;
+}
+
+static inline int spacc_fifo_cmd_full(struct spacc_engine *engine)
+{
+ u32 fifo_stat = readl(engine->regs + SPA_FIFO_STAT_REG_OFFSET);
+
+ return fifo_stat & SPA_FIFO_CMD_FULL;
+}
+
+/*
+ * Given a cipher context, and a context number, get the base address of the
+ * context page.
+ *
+ * Returns the address of the context page where the key/context may
+ * be written.
+ */
+static inline void __iomem *spacc_ctx_page_addr(struct spacc_generic_ctx *ctx,
+ unsigned indx,
+ bool is_cipher_ctx)
+{
+ return is_cipher_ctx ? ctx->engine->cipher_ctx_base +
+ (indx * ctx->engine->cipher_pg_sz) :
+ ctx->engine->hash_key_base + (indx * ctx->engine->hash_pg_sz);
+}
+
+/* The context pages can only be written with 32-bit accesses. */
+static inline void memcpy_toio32(u32 __iomem *dst, const void *src,
+ unsigned count)
+{
+ const u32 *src32 = (const u32 *) src;
+
+ while (count--)
+ writel(*src32++, dst++);
+}
+
+static void spacc_cipher_write_ctx(struct spacc_generic_ctx *ctx,
+ void __iomem *page_addr, const u8 *key,
+ size_t key_len, const u8 *iv, size_t iv_len)
+{
+ void __iomem *key_ptr = page_addr + ctx->key_offs;
+ void __iomem *iv_ptr = page_addr + ctx->iv_offs;
+
+ memcpy_toio32(key_ptr, key, key_len / 4);
+ memcpy_toio32(iv_ptr, iv, iv_len / 4);
+}
+
+/*
+ * Load a context into the engines context memory.
+ *
+ * Returns the index of the context page where the context was loaded.
+ */
+static unsigned spacc_load_ctx(struct spacc_generic_ctx *ctx,
+ const u8 *ciph_key, size_t ciph_len,
+ const u8 *iv, size_t ivlen, const u8 *hash_key,
+ size_t hash_len)
+{
+ unsigned indx = ctx->engine->next_ctx++;
+ void __iomem *ciph_page_addr, *hash_page_addr;
+
+ ciph_page_addr = spacc_ctx_page_addr(ctx, indx, 1);
+ hash_page_addr = spacc_ctx_page_addr(ctx, indx, 0);
+
+ ctx->engine->next_ctx &= ctx->engine->fifo_sz - 1;
+ spacc_cipher_write_ctx(ctx, ciph_page_addr, ciph_key, ciph_len, iv,
+ ivlen);
+ writel(ciph_len | (indx << SPA_KEY_SZ_CTX_INDEX_OFFSET) |
+ (1 << SPA_KEY_SZ_CIPHER_OFFSET),
+ ctx->engine->regs + SPA_KEY_SZ_REG_OFFSET);
+
+ if (hash_key) {
+ memcpy_toio32(hash_page_addr, hash_key, hash_len / 4);
+ writel(hash_len | (indx << SPA_KEY_SZ_CTX_INDEX_OFFSET),
+ ctx->engine->regs + SPA_KEY_SZ_REG_OFFSET);
+ }
+
+ return indx;
+}
+
+/* Count the number of scatterlist entries in a scatterlist. */
+static int sg_count(struct scatterlist *sg_list, int nbytes)
+{
+ struct scatterlist *sg = sg_list;
+ int sg_nents = 0;
+
+ while (nbytes > 0) {
+ ++sg_nents;
+ nbytes -= sg->length;
+ sg = sg_next(sg);
+ }
+
+ return sg_nents;
+}
+
+static inline void ddt_set(struct spacc_ddt *ddt, dma_addr_t phys, size_t len)
+{
+ ddt->p = phys;
+ ddt->len = len;
+}
+
+/*
+ * Take a crypto request and scatterlists for the data and turn them into DDTs
+ * for passing to the crypto engines. This also DMA maps the data so that the
+ * crypto engines can DMA to/from them.
+ */
+static struct spacc_ddt *spacc_sg_to_ddt(struct spacc_engine *engine,
+ struct scatterlist *payload,
+ unsigned nbytes,
+ enum dma_data_direction dir,
+ dma_addr_t *ddt_phys)
+{
+ unsigned nents, mapped_ents;
+ struct scatterlist *cur;
+ struct spacc_ddt *ddt;
+ int i;
+
+ nents = sg_count(payload, nbytes);
+ mapped_ents = dma_map_sg(engine->dev, payload, nents, dir);
+
+ if (mapped_ents + 1 > MAX_DDT_LEN)
+ goto out;
+
+ ddt = dma_pool_alloc(engine->req_pool, GFP_ATOMIC, ddt_phys);
+ if (!ddt)
+ goto out;
+
+ for_each_sg(payload, cur, mapped_ents, i)
+ ddt_set(&ddt[i], sg_dma_address(cur), sg_dma_len(cur));
+ ddt_set(&ddt[mapped_ents], 0, 0);
+
+ return ddt;
+
+out:
+ dma_unmap_sg(engine->dev, payload, nents, dir);
+ return NULL;
+}
+
+static int spacc_aead_make_ddts(struct spacc_req *req, u8 *giv)
+{
+ struct aead_request *areq = container_of(req->req, struct aead_request,
+ base);
+ struct spacc_engine *engine = req->engine;
+ struct spacc_ddt *src_ddt, *dst_ddt;
+ unsigned ivsize = crypto_aead_ivsize(crypto_aead_reqtfm(areq));
+ unsigned nents = sg_count(areq->src, areq->cryptlen);
+ dma_addr_t iv_addr;
+ struct scatterlist *cur;
+ int i, dst_ents, src_ents, assoc_ents;
+ u8 *iv = giv ? giv : areq->iv;
+
+ src_ddt = dma_pool_alloc(engine->req_pool, GFP_ATOMIC, &req->src_addr);
+ if (!src_ddt)
+ return -ENOMEM;
+
+ dst_ddt = dma_pool_alloc(engine->req_pool, GFP_ATOMIC, &req->dst_addr);
+ if (!dst_ddt) {
+ dma_pool_free(engine->req_pool, src_ddt, req->src_addr);
+ return -ENOMEM;
+ }
+
+ req->src_ddt = src_ddt;
+ req->dst_ddt = dst_ddt;
+
+ assoc_ents = dma_map_sg(engine->dev, areq->assoc,
+ sg_count(areq->assoc, areq->assoclen), DMA_TO_DEVICE);
+ if (areq->src != areq->dst) {
+ src_ents = dma_map_sg(engine->dev, areq->src, nents,
+ DMA_TO_DEVICE);
+ dst_ents = dma_map_sg(engine->dev, areq->dst, nents,
+ DMA_FROM_DEVICE);
+ } else {
+ src_ents = dma_map_sg(engine->dev, areq->src, nents,
+ DMA_BIDIRECTIONAL);
+ dst_ents = 0;
+ }
+
+ /*
+ * Map the IV/GIV. For the GIV it needs to be bidirectional as it is
+ * formed by the crypto block and sent as the ESP IV for IPSEC.
+ */
+ iv_addr = dma_map_single(engine->dev, iv, ivsize,
+ giv ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
+ req->giv_pa = iv_addr;
+
+ /*
+ * Map the associated data. For decryption we don't copy the
+ * associated data.
+ */
+ for_each_sg(areq->assoc, cur, assoc_ents, i) {
+ ddt_set(src_ddt++, sg_dma_address(cur), sg_dma_len(cur));
+ if (req->is_encrypt)
+ ddt_set(dst_ddt++, sg_dma_address(cur),
+ sg_dma_len(cur));
+ }
+ ddt_set(src_ddt++, iv_addr, ivsize);
+
+ if (giv || req->is_encrypt)
+ ddt_set(dst_ddt++, iv_addr, ivsize);
+
+ /*
+ * Now map in the payload for the source and destination and terminate
+ * with the NULL pointers.
+ */
+ for_each_sg(areq->src, cur, src_ents, i) {
+ ddt_set(src_ddt++, sg_dma_address(cur), sg_dma_len(cur));
+ if (areq->src == areq->dst)
+ ddt_set(dst_ddt++, sg_dma_address(cur),
+ sg_dma_len(cur));
+ }
+
+ for_each_sg(areq->dst, cur, dst_ents, i)
+ ddt_set(dst_ddt++, sg_dma_address(cur),
+ sg_dma_len(cur));
+
+ ddt_set(src_ddt, 0, 0);
+ ddt_set(dst_ddt, 0, 0);
+
+ return 0;
+}
+
+static void spacc_aead_free_ddts(struct spacc_req *req)
+{
+ struct aead_request *areq = container_of(req->req, struct aead_request,
+ base);
+ struct spacc_alg *alg = to_spacc_alg(req->req->tfm->__crt_alg);
+ struct spacc_ablk_ctx *aead_ctx = crypto_tfm_ctx(req->req->tfm);
+ struct spacc_engine *engine = aead_ctx->generic.engine;
+ unsigned ivsize = alg->alg.cra_aead.ivsize;
+ unsigned nents = sg_count(areq->src, areq->cryptlen);
+
+ if (areq->src != areq->dst) {
+ dma_unmap_sg(engine->dev, areq->src, nents, DMA_TO_DEVICE);
+ dma_unmap_sg(engine->dev, areq->dst,
+ sg_count(areq->dst, areq->cryptlen),
+ DMA_FROM_DEVICE);
+ } else
+ dma_unmap_sg(engine->dev, areq->src, nents, DMA_BIDIRECTIONAL);
+
+ dma_unmap_sg(engine->dev, areq->assoc,
+ sg_count(areq->assoc, areq->assoclen), DMA_TO_DEVICE);
+
+ dma_unmap_single(engine->dev, req->giv_pa, ivsize, DMA_BIDIRECTIONAL);
+
+ dma_pool_free(engine->req_pool, req->src_ddt, req->src_addr);
+ dma_pool_free(engine->req_pool, req->dst_ddt, req->dst_addr);
+}
+
+static void spacc_free_ddt(struct spacc_req *req, struct spacc_ddt *ddt,
+ dma_addr_t ddt_addr, struct scatterlist *payload,
+ unsigned nbytes, enum dma_data_direction dir)
+{
+ unsigned nents = sg_count(payload, nbytes);
+
+ dma_unmap_sg(req->engine->dev, payload, nents, dir);
+ dma_pool_free(req->engine->req_pool, ddt, ddt_addr);
+}
+
+/*
+ * Set key for a DES operation in an AEAD cipher. This also performs weak key
+ * checking if required.
+ */
+static int spacc_aead_des_setkey(struct crypto_aead *aead, const u8 *key,
+ unsigned int len)
+{
+ struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+ struct spacc_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+ u32 tmp[DES_EXPKEY_WORDS];
+
+ if (unlikely(!des_ekey(tmp, key)) &&
+ (crypto_aead_get_flags(aead)) & CRYPTO_TFM_REQ_WEAK_KEY) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+ return -EINVAL;
+ }
+
+ memcpy(ctx->cipher_key, key, len);
+ ctx->cipher_key_len = len;
+
+ return 0;
+}
+
+/* Set the key for the AES block cipher component of the AEAD transform. */
+static int spacc_aead_aes_setkey(struct crypto_aead *aead, const u8 *key,
+ unsigned int len)
+{
+ struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+ struct spacc_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ /*
+ * IPSec engine only supports 128 and 256 bit AES keys. If we get a
+ * request for any other size (192 bits) then we need to do a software
+ * fallback.
+ */
+ if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256) {
+ /*
+ * Set the fallback transform to use the same request flags as
+ * the hardware transform.
+ */
+ ctx->sw_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+ ctx->sw_cipher->base.crt_flags |=
+ tfm->crt_flags & CRYPTO_TFM_REQ_MASK;
+ return crypto_aead_setkey(ctx->sw_cipher, key, len);
+ }
+
+ memcpy(ctx->cipher_key, key, len);
+ ctx->cipher_key_len = len;
+
+ return 0;
+}
+
+static int spacc_aead_setkey(struct crypto_aead *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct spacc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ struct spacc_alg *alg = to_spacc_alg(tfm->base.__crt_alg);
+ struct rtattr *rta = (void *)key;
+ struct crypto_authenc_key_param *param;
+ unsigned int authkeylen, enckeylen;
+ int err = -EINVAL;
+
+ if (!RTA_OK(rta, keylen))
+ goto badkey;
+
+ if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM)
+ goto badkey;
+
+ if (RTA_PAYLOAD(rta) < sizeof(*param))
+ goto badkey;
+
+ param = RTA_DATA(rta);
+ enckeylen = be32_to_cpu(param->enckeylen);
+
+ key += RTA_ALIGN(rta->rta_len);
+ keylen -= RTA_ALIGN(rta->rta_len);
+
+ if (keylen < enckeylen)
+ goto badkey;
+
+ authkeylen = keylen - enckeylen;
+
+ if (enckeylen > AES_MAX_KEY_SIZE)
+ goto badkey;
+
+ if ((alg->ctrl_default & SPACC_CRYPTO_ALG_MASK) ==
+ SPA_CTRL_CIPH_ALG_AES)
+ err = spacc_aead_aes_setkey(tfm, key + authkeylen, enckeylen);
+ else
+ err = spacc_aead_des_setkey(tfm, key + authkeylen, enckeylen);
+
+ if (err)
+ goto badkey;
+
+ memcpy(ctx->hash_ctx, key, authkeylen);
+ ctx->hash_key_len = authkeylen;
+
+ return 0;
+
+badkey:
+ crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+}
+
+static int spacc_aead_setauthsize(struct crypto_aead *tfm,
+ unsigned int authsize)
+{
+ struct spacc_aead_ctx *ctx = crypto_tfm_ctx(crypto_aead_tfm(tfm));
+
+ ctx->auth_size = authsize;
+
+ return 0;
+}
+
+/*
+ * Check if an AEAD request requires a fallback operation. Some requests can't
+ * be completed in hardware because the hardware may not support certain key
+ * sizes. In these cases we need to complete the request in software.
+ */
+static int spacc_aead_need_fallback(struct spacc_req *req)
+{
+ struct aead_request *aead_req;
+ struct crypto_tfm *tfm = req->req->tfm;
+ struct crypto_alg *alg = req->req->tfm->__crt_alg;
+ struct spacc_alg *spacc_alg = to_spacc_alg(alg);
+ struct spacc_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ aead_req = container_of(req->req, struct aead_request, base);
+ /*
+ * If we have a non-supported key-length, then we need to do a
+ * software fallback.
+ */
+ if ((spacc_alg->ctrl_default & SPACC_CRYPTO_ALG_MASK) ==
+ SPA_CTRL_CIPH_ALG_AES &&
+ ctx->cipher_key_len != AES_KEYSIZE_128 &&
+ ctx->cipher_key_len != AES_KEYSIZE_256)
+ return 1;
+
+ return 0;
+}
+
+static int spacc_aead_do_fallback(struct aead_request *req, unsigned alg_type,
+ bool is_encrypt)
+{
+ struct crypto_tfm *old_tfm = crypto_aead_tfm(crypto_aead_reqtfm(req));
+ struct spacc_aead_ctx *ctx = crypto_tfm_ctx(old_tfm);
+ int err;
+
+ if (ctx->sw_cipher) {
+ /*
+ * Change the request to use the software fallback transform,
+ * and once the ciphering has completed, put the old transform
+ * back into the request.
+ */
+ aead_request_set_tfm(req, ctx->sw_cipher);
+ err = is_encrypt ? crypto_aead_encrypt(req) :
+ crypto_aead_decrypt(req);
+ aead_request_set_tfm(req, __crypto_aead_cast(old_tfm));
+ } else
+ err = -EINVAL;
+
+ return err;
+}
+
+static void spacc_aead_complete(struct spacc_req *req)
+{
+ spacc_aead_free_ddts(req);
+ req->req->complete(req->req, req->result);
+}
+
+static int spacc_aead_submit(struct spacc_req *req)
+{
+ struct crypto_tfm *tfm = req->req->tfm;
+ struct spacc_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct crypto_alg *alg = req->req->tfm->__crt_alg;
+ struct spacc_alg *spacc_alg = to_spacc_alg(alg);
+ struct spacc_engine *engine = ctx->generic.engine;
+ u32 ctrl, proc_len, assoc_len;
+ struct aead_request *aead_req =
+ container_of(req->req, struct aead_request, base);
+
+ req->result = -EINPROGRESS;
+ req->ctx_id = spacc_load_ctx(&ctx->generic, ctx->cipher_key,
+ ctx->cipher_key_len, aead_req->iv, alg->cra_aead.ivsize,
+ ctx->hash_ctx, ctx->hash_key_len);
+
+ /* Set the source and destination DDT pointers. */
+ writel(req->src_addr, engine->regs + SPA_SRC_PTR_REG_OFFSET);
+ writel(req->dst_addr, engine->regs + SPA_DST_PTR_REG_OFFSET);
+ writel(0, engine->regs + SPA_OFFSET_REG_OFFSET);
+
+ assoc_len = aead_req->assoclen;
+ proc_len = aead_req->cryptlen + assoc_len;
+
+ /*
+ * If we aren't generating an IV, then we need to include the IV in the
+ * associated data so that it is included in the hash.
+ */
+ if (!req->giv) {
+ assoc_len += crypto_aead_ivsize(crypto_aead_reqtfm(aead_req));
+ proc_len += crypto_aead_ivsize(crypto_aead_reqtfm(aead_req));
+ } else
+ proc_len += req->giv_len;
+
+ /*
+ * If we are decrypting, we need to take the length of the ICV out of
+ * the processing length.
+ */
+ if (!req->is_encrypt)
+ proc_len -= ctx->auth_size;
+
+ writel(proc_len, engine->regs + SPA_PROC_LEN_REG_OFFSET);
+ writel(assoc_len, engine->regs + SPA_AAD_LEN_REG_OFFSET);
+ writel(ctx->auth_size, engine->regs + SPA_ICV_LEN_REG_OFFSET);
+ writel(0, engine->regs + SPA_ICV_OFFSET_REG_OFFSET);
+ writel(0, engine->regs + SPA_AUX_INFO_REG_OFFSET);
+
+ ctrl = spacc_alg->ctrl_default | (req->ctx_id << SPA_CTRL_CTX_IDX) |
+ (1 << SPA_CTRL_ICV_APPEND);
+ if (req->is_encrypt)
+ ctrl |= (1 << SPA_CTRL_ENCRYPT_IDX) | (1 << SPA_CTRL_AAD_COPY);
+ else
+ ctrl |= (1 << SPA_CTRL_KEY_EXP);
+
+ mod_timer(&engine->packet_timeout, jiffies + PACKET_TIMEOUT);
+
+ writel(ctrl, engine->regs + SPA_CTRL_REG_OFFSET);
+
+ return -EINPROGRESS;
+}
+
+/*
+ * Setup an AEAD request for processing. This will configure the engine, load
+ * the context and then start the packet processing.
+ *
+ * @giv Pointer to destination address for a generated IV. If the
+ * request does not need to generate an IV then this should be set to NULL.
+ */
+static int spacc_aead_setup(struct aead_request *req, u8 *giv,
+ unsigned alg_type, bool is_encrypt)
+{
+ struct crypto_alg *alg = req->base.tfm->__crt_alg;
+ struct spacc_engine *engine = to_spacc_alg(alg)->engine;
+ struct spacc_req *dev_req = aead_request_ctx(req);
+ int err = -EINPROGRESS;
+ unsigned long flags;
+ unsigned ivsize = crypto_aead_ivsize(crypto_aead_reqtfm(req));
+
+ dev_req->giv = giv;
+ dev_req->giv_len = ivsize;
+ dev_req->req = &req->base;
+ dev_req->is_encrypt = is_encrypt;
+ dev_req->result = -EBUSY;
+ dev_req->engine = engine;
+ dev_req->complete = spacc_aead_complete;
+
+ if (unlikely(spacc_aead_need_fallback(dev_req)))
+ return spacc_aead_do_fallback(req, alg_type, is_encrypt);
+
+ spacc_aead_make_ddts(dev_req, dev_req->giv);
+
+ err = -EINPROGRESS;
+ spin_lock_irqsave(&engine->hw_lock, flags);
+ if (unlikely(spacc_fifo_cmd_full(engine))) {
+ if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+ err = -EBUSY;
+ spin_unlock_irqrestore(&engine->hw_lock, flags);
+ goto out_free_ddts;
+ }
+ list_add_tail(&dev_req->list, &engine->pending);
+ } else {
+ ++engine->in_flight;
+ list_add_tail(&dev_req->list, &engine->in_progress);
+ spacc_aead_submit(dev_req);
+ }
+ spin_unlock_irqrestore(&engine->hw_lock, flags);
+
+ goto out;
+
+out_free_ddts:
+ spacc_aead_free_ddts(dev_req);
+out:
+ return err;
+}
+
+static int spacc_aead_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+ struct spacc_alg *alg = to_spacc_alg(tfm->__crt_alg);
+
+ return spacc_aead_setup(req, NULL, alg->type, 1);
+}
+
+static int spacc_aead_givencrypt(struct aead_givcrypt_request *req)
+{
+ struct crypto_aead *tfm = aead_givcrypt_reqtfm(req);
+ struct spacc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ size_t ivsize = crypto_aead_ivsize(tfm);
+ struct spacc_alg *alg = to_spacc_alg(tfm->base.__crt_alg);
+ unsigned len;
+ __be64 seq;
+
+ memcpy(req->areq.iv, ctx->salt, ivsize);
+ len = ivsize;
+ if (ivsize > sizeof(u64)) {
+ memset(req->giv, 0, ivsize - sizeof(u64));
+ len = sizeof(u64);
+ }
+ seq = cpu_to_be64(req->seq);
+ memcpy(req->giv + ivsize - len, &seq, len);
+
+ return spacc_aead_setup(&req->areq, req->giv, alg->type, 1);
+}
+
+static int spacc_aead_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+ struct spacc_alg *alg = to_spacc_alg(tfm->__crt_alg);
+
+ return spacc_aead_setup(req, NULL, alg->type, 0);
+}
+
+/*
+ * Initialise a new AEAD context. This is responsible for allocating the
+ * fallback cipher and initialising the context.
+ */
+static int spacc_aead_cra_init(struct crypto_tfm *tfm)
+{
+ struct spacc_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct crypto_alg *alg = tfm->__crt_alg;
+ struct spacc_alg *spacc_alg = to_spacc_alg(alg);
+ struct spacc_engine *engine = spacc_alg->engine;
+
+ ctx->generic.flags = spacc_alg->type;
+ ctx->generic.engine = engine;
+ ctx->sw_cipher = crypto_alloc_aead(alg->cra_name, 0,
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->sw_cipher)) {
+ dev_warn(engine->dev, "failed to allocate fallback for %s\n",
+ alg->cra_name);
+ ctx->sw_cipher = NULL;
+ }
+ ctx->generic.key_offs = spacc_alg->key_offs;
+ ctx->generic.iv_offs = spacc_alg->iv_offs;
+
+ get_random_bytes(ctx->salt, sizeof(ctx->salt));
+
+ tfm->crt_aead.reqsize = sizeof(struct spacc_req);
+
+ return 0;
+}
+
+/*
+ * Destructor for an AEAD context. This is called when the transform is freed
+ * and must free the fallback cipher.
+ */
+static void spacc_aead_cra_exit(struct crypto_tfm *tfm)
+{
+ struct spacc_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (ctx->sw_cipher)
+ crypto_free_aead(ctx->sw_cipher);
+ ctx->sw_cipher = NULL;
+}
+
+/*
+ * Set the DES key for a block cipher transform. This also performs weak key
+ * checking if the transform has requested it.
+ */
+static int spacc_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+ unsigned int len)
+{
+ struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+ struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
+ u32 tmp[DES_EXPKEY_WORDS];
+
+ if (len > DES3_EDE_KEY_SIZE) {
+ crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ if (unlikely(!des_ekey(tmp, key)) &&
+ (crypto_ablkcipher_get_flags(cipher) & CRYPTO_TFM_REQ_WEAK_KEY)) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+ return -EINVAL;
+ }
+
+ memcpy(ctx->key, key, len);
+ ctx->key_len = len;
+
+ return 0;
+}
+
+/*
+ * Set the key for an AES block cipher. Some key lengths are not supported in
+ * hardware so this must also check whether a fallback is needed.
+ */
+static int spacc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
+ unsigned int len)
+{
+ struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+ struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
+ int err = 0;
+
+ if (len > AES_MAX_KEY_SIZE) {
+ crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ /*
+ * IPSec engine only supports 128 and 256 bit AES keys. If we get a
+ * request for any other size (192 bits) then we need to do a software
+ * fallback.
+ */
+ if ((len != AES_KEYSIZE_128 || len != AES_KEYSIZE_256) &&
+ ctx->sw_cipher) {
+ /*
+ * Set the fallback transform to use the same request flags as
+ * the hardware transform.
+ */
+ ctx->sw_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
+ ctx->sw_cipher->base.crt_flags |=
+ cipher->base.crt_flags & CRYPTO_TFM_REQ_MASK;
+
+ err = crypto_ablkcipher_setkey(ctx->sw_cipher, key, len);
+ if (err)
+ goto sw_setkey_failed;
+ } else if ((len != AES_KEYSIZE_128 || len != AES_KEYSIZE_256) &&
+ !ctx->sw_cipher)
+ err = -EINVAL;
+
+ memcpy(ctx->key, key, len);
+ ctx->key_len = len;
+
+sw_setkey_failed:
+ if (err && ctx->sw_cipher) {
+ tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
+ tfm->crt_flags |=
+ ctx->sw_cipher->base.crt_flags & CRYPTO_TFM_RES_MASK;
+ }
+
+ return err;
+}
+
+static int spacc_kasumi_f8_setkey(struct crypto_ablkcipher *cipher,
+ const u8 *key, unsigned int len)
+{
+ struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+ struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
+ int err = 0;
+
+ if (len > AES_MAX_KEY_SIZE) {
+ crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(ctx->key, key, len);
+ ctx->key_len = len;
+
+out:
+ return err;
+}
+
+static int spacc_ablk_need_fallback(struct spacc_req *req)
+{
+ struct spacc_ablk_ctx *ctx;
+ struct crypto_tfm *tfm = req->req->tfm;
+ struct crypto_alg *alg = req->req->tfm->__crt_alg;
+ struct spacc_alg *spacc_alg = to_spacc_alg(alg);
+
+ ctx = crypto_tfm_ctx(tfm);
+
+ return (spacc_alg->ctrl_default & SPACC_CRYPTO_ALG_MASK) ==
+ SPA_CTRL_CIPH_ALG_AES &&
+ ctx->key_len != AES_KEYSIZE_128 &&
+ ctx->key_len != AES_KEYSIZE_256;
+}
+
+static void spacc_ablk_complete(struct spacc_req *req)
+{
+ struct ablkcipher_request *ablk_req =
+ container_of(req->req, struct ablkcipher_request, base);
+
+ if (ablk_req->src != ablk_req->dst) {
+ spacc_free_ddt(req, req->src_ddt, req->src_addr, ablk_req->src,
+ ablk_req->nbytes, DMA_TO_DEVICE);
+ spacc_free_ddt(req, req->dst_ddt, req->dst_addr, ablk_req->dst,
+ ablk_req->nbytes, DMA_FROM_DEVICE);
+ } else
+ spacc_free_ddt(req, req->dst_ddt, req->dst_addr, ablk_req->dst,
+ ablk_req->nbytes, DMA_BIDIRECTIONAL);
+
+ req->req->complete(req->req, req->result);
+}
+
+static int spacc_ablk_submit(struct spacc_req *req)
+{
+ struct crypto_tfm *tfm = req->req->tfm;
+ struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct ablkcipher_request *ablk_req = ablkcipher_request_cast(req->req);
+ struct crypto_alg *alg = req->req->tfm->__crt_alg;
+ struct spacc_alg *spacc_alg = to_spacc_alg(alg);
+ struct spacc_engine *engine = ctx->generic.engine;
+ u32 ctrl;
+
+ req->ctx_id = spacc_load_ctx(&ctx->generic, ctx->key,
+ ctx->key_len, ablk_req->info, alg->cra_ablkcipher.ivsize,
+ NULL, 0);
+
+ writel(req->src_addr, engine->regs + SPA_SRC_PTR_REG_OFFSET);
+ writel(req->dst_addr, engine->regs + SPA_DST_PTR_REG_OFFSET);
+ writel(0, engine->regs + SPA_OFFSET_REG_OFFSET);
+
+ writel(ablk_req->nbytes, engine->regs + SPA_PROC_LEN_REG_OFFSET);
+ writel(0, engine->regs + SPA_ICV_OFFSET_REG_OFFSET);
+ writel(0, engine->regs + SPA_AUX_INFO_REG_OFFSET);
+ writel(0, engine->regs + SPA_AAD_LEN_REG_OFFSET);
+
+ ctrl = spacc_alg->ctrl_default | (req->ctx_id << SPA_CTRL_CTX_IDX) |
+ (req->is_encrypt ? (1 << SPA_CTRL_ENCRYPT_IDX) :
+ (1 << SPA_CTRL_KEY_EXP));
+
+ mod_timer(&engine->packet_timeout, jiffies + PACKET_TIMEOUT);
+
+ writel(ctrl, engine->regs + SPA_CTRL_REG_OFFSET);
+
+ return -EINPROGRESS;
+}
+
+static int spacc_ablk_do_fallback(struct ablkcipher_request *req,
+ unsigned alg_type, bool is_encrypt)
+{
+ struct crypto_tfm *old_tfm =
+ crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
+ struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(old_tfm);
+ int err;
+
+ if (!ctx->sw_cipher)
+ return -EINVAL;
+
+ /*
+ * Change the request to use the software fallback transform, and once
+ * the ciphering has completed, put the old transform back into the
+ * request.
+ */
+ ablkcipher_request_set_tfm(req, ctx->sw_cipher);
+ err = is_encrypt ? crypto_ablkcipher_encrypt(req) :
+ crypto_ablkcipher_decrypt(req);
+ ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(old_tfm));
+
+ return err;
+}
+
+static int spacc_ablk_setup(struct ablkcipher_request *req, unsigned alg_type,
+ bool is_encrypt)
+{
+ struct crypto_alg *alg = req->base.tfm->__crt_alg;
+ struct spacc_engine *engine = to_spacc_alg(alg)->engine;
+ struct spacc_req *dev_req = ablkcipher_request_ctx(req);
+ unsigned long flags;
+ int err = -ENOMEM;
+
+ dev_req->req = &req->base;
+ dev_req->is_encrypt = is_encrypt;
+ dev_req->engine = engine;
+ dev_req->complete = spacc_ablk_complete;
+ dev_req->result = -EINPROGRESS;
+
+ if (unlikely(spacc_ablk_need_fallback(dev_req)))
+ return spacc_ablk_do_fallback(req, alg_type, is_encrypt);
+
+ /*
+ * Create the DDT's for the engine. If we share the same source and
+ * destination then we can optimize by reusing the DDT's.
+ */
+ if (req->src != req->dst) {
+ dev_req->src_ddt = spacc_sg_to_ddt(engine, req->src,
+ req->nbytes, DMA_TO_DEVICE, &dev_req->src_addr);
+ if (!dev_req->src_ddt)
+ goto out;
+
+ dev_req->dst_ddt = spacc_sg_to_ddt(engine, req->dst,
+ req->nbytes, DMA_FROM_DEVICE, &dev_req->dst_addr);
+ if (!dev_req->dst_ddt)
+ goto out_free_src;
+ } else {
+ dev_req->dst_ddt = spacc_sg_to_ddt(engine, req->dst,
+ req->nbytes, DMA_BIDIRECTIONAL, &dev_req->dst_addr);
+ if (!dev_req->dst_ddt)
+ goto out;
+
+ dev_req->src_ddt = NULL;
+ dev_req->src_addr = dev_req->dst_addr;
+ }
+
+ err = -EINPROGRESS;
+ spin_lock_irqsave(&engine->hw_lock, flags);
+ /*
+ * Check if the engine will accept the operation now. If it won't then
+ * we either stick it on the end of a pending list if we can backlog,
+ * or bailout with an error if not.
+ */
+ if (unlikely(spacc_fifo_cmd_full(engine))) {
+ if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
+ err = -EBUSY;
+ spin_unlock_irqrestore(&engine->hw_lock, flags);
+ goto out_free_ddts;
+ }
+ list_add_tail(&dev_req->list, &engine->pending);
+ } else {
+ ++engine->in_flight;
+ list_add_tail(&dev_req->list, &engine->in_progress);
+ spacc_ablk_submit(dev_req);
+ }
+ spin_unlock_irqrestore(&engine->hw_lock, flags);
+
+ goto out;
+
+out_free_ddts:
+ spacc_free_ddt(dev_req, dev_req->dst_ddt, dev_req->dst_addr, req->dst,
+ req->nbytes, req->src == req->dst ?
+ DMA_BIDIRECTIONAL : DMA_FROM_DEVICE);
+out_free_src:
+ if (req->src != req->dst)
+ spacc_free_ddt(dev_req, dev_req->src_ddt, dev_req->src_addr,
+ req->src, req->nbytes, DMA_TO_DEVICE);
+out:
+ return err;
+}
+
+static int spacc_ablk_cra_init(struct crypto_tfm *tfm)
+{
+ struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct crypto_alg *alg = tfm->__crt_alg;
+ struct spacc_alg *spacc_alg = to_spacc_alg(alg);
+ struct spacc_engine *engine = spacc_alg->engine;
+
+ ctx->generic.flags = spacc_alg->type;
+ ctx->generic.engine = engine;
+ if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
+ ctx->sw_cipher = crypto_alloc_ablkcipher(alg->cra_name, 0,
+ CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->sw_cipher)) {
+ dev_warn(engine->dev, "failed to allocate fallback for %s\n",
+ alg->cra_name);
+ ctx->sw_cipher = NULL;
+ }
+ }
+ ctx->generic.key_offs = spacc_alg->key_offs;
+ ctx->generic.iv_offs = spacc_alg->iv_offs;
+
+ tfm->crt_ablkcipher.reqsize = sizeof(struct spacc_req);
+
+ return 0;
+}
+
+static void spacc_ablk_cra_exit(struct crypto_tfm *tfm)
+{
+ struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (ctx->sw_cipher)
+ crypto_free_ablkcipher(ctx->sw_cipher);
+ ctx->sw_cipher = NULL;
+}
+
+static int spacc_ablk_encrypt(struct ablkcipher_request *req)
+{
+ struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req);
+ struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+ struct spacc_alg *alg = to_spacc_alg(tfm->__crt_alg);
+
+ return spacc_ablk_setup(req, alg->type, 1);
+}
+
+static int spacc_ablk_decrypt(struct ablkcipher_request *req)
+{
+ struct crypto_ablkcipher *cipher = crypto_ablkcipher_reqtfm(req);
+ struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+ struct spacc_alg *alg = to_spacc_alg(tfm->__crt_alg);
+
+ return spacc_ablk_setup(req, alg->type, 0);
+}
+
+static inline int spacc_fifo_stat_empty(struct spacc_engine *engine)
+{
+ return readl(engine->regs + SPA_FIFO_STAT_REG_OFFSET) &
+ SPA_FIFO_STAT_EMPTY;
+}
+
+static void spacc_process_done(struct spacc_engine *engine)
+{
+ struct spacc_req *req;
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->hw_lock, flags);
+
+ while (!spacc_fifo_stat_empty(engine)) {
+ req = list_first_entry(&engine->in_progress, struct spacc_req,
+ list);
+ list_move_tail(&req->list, &engine->completed);
+
+ /* POP the status register. */
+ writel(~0, engine->regs + SPA_STAT_POP_REG_OFFSET);
+ req->result = (readl(engine->regs + SPA_STATUS_REG_OFFSET) &
+ SPA_STATUS_RES_CODE_MASK) >> SPA_STATUS_RES_CODE_OFFSET;
+
+ /*
+ * Convert the SPAcc error status into the standard POSIX error
+ * codes.
+ */
+ if (unlikely(req->result)) {
+ switch (req->result) {
+ case SPA_STATUS_ICV_FAIL:
+ req->result = -EBADMSG;
+ break;
+
+ case SPA_STATUS_MEMORY_ERROR:
+ dev_warn(engine->dev,
+ "memory error triggered\n");
+ req->result = -EFAULT;
+ break;
+
+ case SPA_STATUS_BLOCK_ERROR:
+ dev_warn(engine->dev,
+ "block error triggered\n");
+ req->result = -EIO;
+ break;
+ }
+ }
+ }
+
+ tasklet_schedule(&engine->complete);
+
+ spin_unlock_irqrestore(&engine->hw_lock, flags);
+}
+
+static irqreturn_t spacc_spacc_irq(int irq, void *dev)
+{
+ struct spacc_engine *engine = (struct spacc_engine *)dev;
+ u32 spacc_irq_stat = readl(engine->regs + SPA_IRQ_STAT_REG_OFFSET);
+
+ writel(spacc_irq_stat, engine->regs + SPA_IRQ_STAT_REG_OFFSET);
+ spacc_process_done(engine);
+
+ return IRQ_HANDLED;
+}
+
+static void spacc_packet_timeout(unsigned long data)
+{
+ struct spacc_engine *engine = (struct spacc_engine *)data;
+
+ spacc_process_done(engine);
+}
+
+static int spacc_req_submit(struct spacc_req *req)
+{
+ struct crypto_alg *alg = req->req->tfm->__crt_alg;
+
+ if (CRYPTO_ALG_TYPE_AEAD == (CRYPTO_ALG_TYPE_MASK & alg->cra_flags))
+ return spacc_aead_submit(req);
+ else
+ return spacc_ablk_submit(req);
+}
+
+static void spacc_spacc_complete(unsigned long data)
+{
+ struct spacc_engine *engine = (struct spacc_engine *)data;
+ struct spacc_req *req, *tmp;
+ unsigned long flags;
+ int num_removed = 0;
+ LIST_HEAD(completed);
+
+ spin_lock_irqsave(&engine->hw_lock, flags);
+ list_splice_init(&engine->completed, &completed);
+ spin_unlock_irqrestore(&engine->hw_lock, flags);
+
+ list_for_each_entry_safe(req, tmp, &completed, list) {
+ ++num_removed;
+ req->complete(req);
+ }
+
+ /* Try and fill the engine back up again. */
+ spin_lock_irqsave(&engine->hw_lock, flags);
+
+ engine->in_flight -= num_removed;
+
+ list_for_each_entry_safe(req, tmp, &engine->pending, list) {
+ if (spacc_fifo_cmd_full(engine))
+ break;
+
+ list_move_tail(&req->list, &engine->in_progress);
+ ++engine->in_flight;
+ req->result = spacc_req_submit(req);
+ }
+
+ if (engine->in_flight)
+ mod_timer(&engine->packet_timeout, jiffies + PACKET_TIMEOUT);
+
+ spin_unlock_irqrestore(&engine->hw_lock, flags);
+}
+
+#ifdef CONFIG_PM
+static int spacc_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct spacc_engine *engine = platform_get_drvdata(pdev);
+
+ /*
+ * We only support standby mode. All we have to do is gate the clock to
+ * the spacc. The hardware will preserve state until we turn it back
+ * on again.
+ */
+ clk_disable(engine->clk);
+
+ return 0;
+}
+
+static int spacc_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct spacc_engine *engine = platform_get_drvdata(pdev);
+
+ return clk_enable(engine->clk);
+}
+
+static const struct dev_pm_ops spacc_pm_ops = {
+ .suspend = spacc_suspend,
+ .resume = spacc_resume,
+};
+#endif /* CONFIG_PM */
+
+static inline struct spacc_engine *spacc_dev_to_engine(struct device *dev)
+{
+ return dev ? platform_get_drvdata(to_platform_device(dev)) : NULL;
+}
+
+static ssize_t spacc_stat_irq_thresh_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct spacc_engine *engine = spacc_dev_to_engine(dev);
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", engine->stat_irq_thresh);
+}
+
+static ssize_t spacc_stat_irq_thresh_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct spacc_engine *engine = spacc_dev_to_engine(dev);
+ unsigned long thresh;
+
+ if (strict_strtoul(buf, 0, &thresh))
+ return -EINVAL;
+
+ thresh = clamp(thresh, 1UL, engine->fifo_sz - 1);
+
+ engine->stat_irq_thresh = thresh;
+ writel(engine->stat_irq_thresh << SPA_IRQ_CTRL_STAT_CNT_OFFSET,
+ engine->regs + SPA_IRQ_CTRL_REG_OFFSET);
+
+ return len;
+}
+static DEVICE_ATTR(stat_irq_thresh, 0644, spacc_stat_irq_thresh_show,
+ spacc_stat_irq_thresh_store);
+
+static struct spacc_alg ipsec_engine_algs[] = {
+ {
+ .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_CBC,
+ .key_offs = 0,
+ .iv_offs = AES_MAX_KEY_SIZE,
+ .alg = {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_ablkcipher = {
+ .setkey = spacc_aes_setkey,
+ .encrypt = spacc_ablk_encrypt,
+ .decrypt = spacc_ablk_decrypt,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ },
+ .cra_init = spacc_ablk_cra_init,
+ .cra_exit = spacc_ablk_cra_exit,
+ },
+ },
+ {
+ .key_offs = 0,
+ .iv_offs = AES_MAX_KEY_SIZE,
+ .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_ECB,
+ .alg = {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+ CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_ablkcipher = {
+ .setkey = spacc_aes_setkey,
+ .encrypt = spacc_ablk_encrypt,
+ .decrypt = spacc_ablk_decrypt,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ },
+ .cra_init = spacc_ablk_cra_init,
+ .cra_exit = spacc_ablk_cra_exit,
+ },
+ },
+ {
+ .key_offs = DES_BLOCK_SIZE,
+ .iv_offs = 0,
+ .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_CBC,
+ .alg = {
+ .cra_name = "cbc(des)",
+ .cra_driver_name = "cbc-des-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_ablkcipher = {
+ .setkey = spacc_des_setkey,
+ .encrypt = spacc_ablk_encrypt,
+ .decrypt = spacc_ablk_decrypt,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ },
+ .cra_init = spacc_ablk_cra_init,
+ .cra_exit = spacc_ablk_cra_exit,
+ },
+ },
+ {
+ .key_offs = DES_BLOCK_SIZE,
+ .iv_offs = 0,
+ .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_ECB,
+ .alg = {
+ .cra_name = "ecb(des)",
+ .cra_driver_name = "ecb-des-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_ablkcipher = {
+ .setkey = spacc_des_setkey,
+ .encrypt = spacc_ablk_encrypt,
+ .decrypt = spacc_ablk_decrypt,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ },
+ .cra_init = spacc_ablk_cra_init,
+ .cra_exit = spacc_ablk_cra_exit,
+ },
+ },
+ {
+ .key_offs = DES_BLOCK_SIZE,
+ .iv_offs = 0,
+ .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_CBC,
+ .alg = {
+ .cra_name = "cbc(des3_ede)",
+ .cra_driver_name = "cbc-des3-ede-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_ablkcipher = {
+ .setkey = spacc_des_setkey,
+ .encrypt = spacc_ablk_encrypt,
+ .decrypt = spacc_ablk_decrypt,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ },
+ .cra_init = spacc_ablk_cra_init,
+ .cra_exit = spacc_ablk_cra_exit,
+ },
+ },
+ {
+ .key_offs = DES_BLOCK_SIZE,
+ .iv_offs = 0,
+ .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_ECB,
+ .alg = {
+ .cra_name = "ecb(des3_ede)",
+ .cra_driver_name = "ecb-des3-ede-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_ablkcipher = {
+ .setkey = spacc_des_setkey,
+ .encrypt = spacc_ablk_encrypt,
+ .decrypt = spacc_ablk_decrypt,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ },
+ .cra_init = spacc_ablk_cra_init,
+ .cra_exit = spacc_ablk_cra_exit,
+ },
+ },
+ {
+ .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_CBC |
+ SPA_CTRL_HASH_ALG_SHA | SPA_CTRL_HASH_MODE_HMAC,
+ .key_offs = 0,
+ .iv_offs = AES_MAX_KEY_SIZE,
+ .alg = {
+ .cra_name = "authenc(hmac(sha1),cbc(aes))",
+ .cra_driver_name = "authenc-hmac-sha1-cbc-aes-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct spacc_aead_ctx),
+ .cra_type = &crypto_aead_type,
+ .cra_module = THIS_MODULE,
+ .cra_aead = {
+ .setkey = spacc_aead_setkey,
+ .setauthsize = spacc_aead_setauthsize,
+ .encrypt = spacc_aead_encrypt,
+ .decrypt = spacc_aead_decrypt,
+ .givencrypt = spacc_aead_givencrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA1_DIGEST_SIZE,
+ },
+ .cra_init = spacc_aead_cra_init,
+ .cra_exit = spacc_aead_cra_exit,
+ },
+ },
+ {
+ .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_CBC |
+ SPA_CTRL_HASH_ALG_SHA256 |
+ SPA_CTRL_HASH_MODE_HMAC,
+ .key_offs = 0,
+ .iv_offs = AES_MAX_KEY_SIZE,
+ .alg = {
+ .cra_name = "authenc(hmac(sha256),cbc(aes))",
+ .cra_driver_name = "authenc-hmac-sha256-cbc-aes-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct spacc_aead_ctx),
+ .cra_type = &crypto_aead_type,
+ .cra_module = THIS_MODULE,
+ .cra_aead = {
+ .setkey = spacc_aead_setkey,
+ .setauthsize = spacc_aead_setauthsize,
+ .encrypt = spacc_aead_encrypt,
+ .decrypt = spacc_aead_decrypt,
+ .givencrypt = spacc_aead_givencrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+ },
+ .cra_init = spacc_aead_cra_init,
+ .cra_exit = spacc_aead_cra_exit,
+ },
+ },
+ {
+ .key_offs = 0,
+ .iv_offs = AES_MAX_KEY_SIZE,
+ .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_CBC |
+ SPA_CTRL_HASH_ALG_MD5 | SPA_CTRL_HASH_MODE_HMAC,
+ .alg = {
+ .cra_name = "authenc(hmac(md5),cbc(aes))",
+ .cra_driver_name = "authenc-hmac-md5-cbc-aes-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct spacc_aead_ctx),
+ .cra_type = &crypto_aead_type,
+ .cra_module = THIS_MODULE,
+ .cra_aead = {
+ .setkey = spacc_aead_setkey,
+ .setauthsize = spacc_aead_setauthsize,
+ .encrypt = spacc_aead_encrypt,
+ .decrypt = spacc_aead_decrypt,
+ .givencrypt = spacc_aead_givencrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = MD5_DIGEST_SIZE,
+ },
+ .cra_init = spacc_aead_cra_init,
+ .cra_exit = spacc_aead_cra_exit,
+ },
+ },
+ {
+ .key_offs = DES_BLOCK_SIZE,
+ .iv_offs = 0,
+ .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_CBC |
+ SPA_CTRL_HASH_ALG_SHA | SPA_CTRL_HASH_MODE_HMAC,
+ .alg = {
+ .cra_name = "authenc(hmac(sha1),cbc(des3_ede))",
+ .cra_driver_name = "authenc-hmac-sha1-cbc-3des-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct spacc_aead_ctx),
+ .cra_type = &crypto_aead_type,
+ .cra_module = THIS_MODULE,
+ .cra_aead = {
+ .setkey = spacc_aead_setkey,
+ .setauthsize = spacc_aead_setauthsize,
+ .encrypt = spacc_aead_encrypt,
+ .decrypt = spacc_aead_decrypt,
+ .givencrypt = spacc_aead_givencrypt,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = SHA1_DIGEST_SIZE,
+ },
+ .cra_init = spacc_aead_cra_init,
+ .cra_exit = spacc_aead_cra_exit,
+ },
+ },
+ {
+ .key_offs = DES_BLOCK_SIZE,
+ .iv_offs = 0,
+ .ctrl_default = SPA_CTRL_CIPH_ALG_AES | SPA_CTRL_CIPH_MODE_CBC |
+ SPA_CTRL_HASH_ALG_SHA256 |
+ SPA_CTRL_HASH_MODE_HMAC,
+ .alg = {
+ .cra_name = "authenc(hmac(sha256),cbc(des3_ede))",
+ .cra_driver_name = "authenc-hmac-sha256-cbc-3des-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct spacc_aead_ctx),
+ .cra_type = &crypto_aead_type,
+ .cra_module = THIS_MODULE,
+ .cra_aead = {
+ .setkey = spacc_aead_setkey,
+ .setauthsize = spacc_aead_setauthsize,
+ .encrypt = spacc_aead_encrypt,
+ .decrypt = spacc_aead_decrypt,
+ .givencrypt = spacc_aead_givencrypt,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+ },
+ .cra_init = spacc_aead_cra_init,
+ .cra_exit = spacc_aead_cra_exit,
+ },
+ },
+ {
+ .key_offs = DES_BLOCK_SIZE,
+ .iv_offs = 0,
+ .ctrl_default = SPA_CTRL_CIPH_ALG_DES | SPA_CTRL_CIPH_MODE_CBC |
+ SPA_CTRL_HASH_ALG_MD5 | SPA_CTRL_HASH_MODE_HMAC,
+ .alg = {
+ .cra_name = "authenc(hmac(md5),cbc(des3_ede))",
+ .cra_driver_name = "authenc-hmac-md5-cbc-3des-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct spacc_aead_ctx),
+ .cra_type = &crypto_aead_type,
+ .cra_module = THIS_MODULE,
+ .cra_aead = {
+ .setkey = spacc_aead_setkey,
+ .setauthsize = spacc_aead_setauthsize,
+ .encrypt = spacc_aead_encrypt,
+ .decrypt = spacc_aead_decrypt,
+ .givencrypt = spacc_aead_givencrypt,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = MD5_DIGEST_SIZE,
+ },
+ .cra_init = spacc_aead_cra_init,
+ .cra_exit = spacc_aead_cra_exit,
+ },
+ },
+};
+
+static struct spacc_alg l2_engine_algs[] = {
+ {
+ .key_offs = 0,
+ .iv_offs = SPACC_CRYPTO_KASUMI_F8_KEY_LEN,
+ .ctrl_default = SPA_CTRL_CIPH_ALG_KASUMI |
+ SPA_CTRL_CIPH_MODE_F8,
+ .alg = {
+ .cra_name = "f8(kasumi)",
+ .cra_driver_name = "f8-kasumi-picoxcell",
+ .cra_priority = SPACC_CRYPTO_ALG_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_GIVCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 8,
+ .cra_ctxsize = sizeof(struct spacc_ablk_ctx),
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_ablkcipher = {
+ .setkey = spacc_kasumi_f8_setkey,
+ .encrypt = spacc_ablk_encrypt,
+ .decrypt = spacc_ablk_decrypt,
+ .min_keysize = 16,
+ .max_keysize = 16,
+ .ivsize = 8,
+ },
+ .cra_init = spacc_ablk_cra_init,
+ .cra_exit = spacc_ablk_cra_exit,
+ },
+ },
+};
+
+static int __devinit spacc_probe(struct platform_device *pdev,
+ unsigned max_ctxs, size_t cipher_pg_sz,
+ size_t hash_pg_sz, size_t fifo_sz,
+ struct spacc_alg *algs, size_t num_algs)
+{
+ int i, err, ret = -EINVAL;
+ struct resource *mem, *irq;
+ struct spacc_engine *engine = devm_kzalloc(&pdev->dev, sizeof(*engine),
+ GFP_KERNEL);
+ if (!engine)
+ return -ENOMEM;
+
+ engine->max_ctxs = max_ctxs;
+ engine->cipher_pg_sz = cipher_pg_sz;
+ engine->hash_pg_sz = hash_pg_sz;
+ engine->fifo_sz = fifo_sz;
+ engine->algs = algs;
+ engine->num_algs = num_algs;
+ engine->name = dev_name(&pdev->dev);
+
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+ if (!mem || !irq) {
+ dev_err(&pdev->dev, "no memory/irq resource for engine\n");
+ return -ENXIO;
+ }
+
+ if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem),
+ engine->name))
+ return -ENOMEM;
+
+ engine->regs = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
+ if (!engine->regs) {
+ dev_err(&pdev->dev, "memory map failed\n");
+ return -ENOMEM;
+ }
+
+ if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0,
+ engine->name, engine)) {
+ dev_err(engine->dev, "failed to request IRQ\n");
+ return -EBUSY;
+ }
+
+ engine->dev = &pdev->dev;
+ engine->cipher_ctx_base = engine->regs + SPA_CIPH_KEY_BASE_REG_OFFSET;
+ engine->hash_key_base = engine->regs + SPA_HASH_KEY_BASE_REG_OFFSET;
+
+ engine->req_pool = dmam_pool_create(engine->name, engine->dev,
+ MAX_DDT_LEN * sizeof(struct spacc_ddt), 8, SZ_64K);
+ if (!engine->req_pool)
+ return -ENOMEM;
+
+ spin_lock_init(&engine->hw_lock);
+
+ engine->clk = clk_get(&pdev->dev, NULL);
+ if (IS_ERR(engine->clk)) {
+ dev_info(&pdev->dev, "clk unavailable\n");
+ device_remove_file(&pdev->dev, &dev_attr_stat_irq_thresh);
+ return PTR_ERR(engine->clk);
+ }
+
+ if (clk_enable(engine->clk)) {
+ dev_info(&pdev->dev, "unable to enable clk\n");
+ clk_put(engine->clk);
+ return -EIO;
+ }
+
+ err = device_create_file(&pdev->dev, &dev_attr_stat_irq_thresh);
+ if (err) {
+ clk_disable(engine->clk);
+ clk_put(engine->clk);
+ return err;
+ }
+
+
+ /*
+ * Use an IRQ threshold of 50% as a default. This seems to be a
+ * reasonable trade off of latency against throughput but can be
+ * changed at runtime.
+ */
+ engine->stat_irq_thresh = (engine->fifo_sz / 2);
+
+ /*
+ * Configure the interrupts. We only use the STAT_CNT interrupt as we
+ * only submit a new packet for processing when we complete another in
+ * the queue. This minimizes time spent in the interrupt handler.
+ */
+ writel(engine->stat_irq_thresh << SPA_IRQ_CTRL_STAT_CNT_OFFSET,
+ engine->regs + SPA_IRQ_CTRL_REG_OFFSET);
+ writel(SPA_IRQ_EN_STAT_EN | SPA_IRQ_EN_GLBL_EN,
+ engine->regs + SPA_IRQ_EN_REG_OFFSET);
+
+ setup_timer(&engine->packet_timeout, spacc_packet_timeout,
+ (unsigned long)engine);
+
+ INIT_LIST_HEAD(&engine->pending);
+ INIT_LIST_HEAD(&engine->completed);
+ INIT_LIST_HEAD(&engine->in_progress);
+ engine->in_flight = 0;
+ tasklet_init(&engine->complete, spacc_spacc_complete,
+ (unsigned long)engine);
+
+ platform_set_drvdata(pdev, engine);
+
+ INIT_LIST_HEAD(&engine->registered_algs);
+ for (i = 0; i < engine->num_algs; ++i) {
+ engine->algs[i].engine = engine;
+ err = crypto_register_alg(&engine->algs[i].alg);
+ if (!err) {
+ list_add_tail(&engine->algs[i].entry,
+ &engine->registered_algs);
+ ret = 0;
+ }
+ if (err)
+ dev_err(engine->dev, "failed to register alg \"%s\"\n",
+ engine->algs[i].alg.cra_name);
+ else
+ dev_dbg(engine->dev, "registered alg \"%s\"\n",
+ engine->algs[i].alg.cra_name);
+ }
+
+ return ret;
+}
+
+static int __devexit spacc_remove(struct platform_device *pdev)
+{
+ struct spacc_alg *alg, *next;
+ struct spacc_engine *engine = platform_get_drvdata(pdev);
+
+ del_timer_sync(&engine->packet_timeout);
+ device_remove_file(&pdev->dev, &dev_attr_stat_irq_thresh);
+
+ list_for_each_entry_safe(alg, next, &engine->registered_algs, entry) {
+ list_del(&alg->entry);
+ crypto_unregister_alg(&alg->alg);
+ }
+
+ clk_disable(engine->clk);
+ clk_put(engine->clk);
+
+ return 0;
+}
+
+static int __devinit ipsec_probe(struct platform_device *pdev)
+{
+ return spacc_probe(pdev, SPACC_CRYPTO_IPSEC_MAX_CTXS,
+ SPACC_CRYPTO_IPSEC_CIPHER_PG_SZ,
+ SPACC_CRYPTO_IPSEC_HASH_PG_SZ,
+ SPACC_CRYPTO_IPSEC_FIFO_SZ, ipsec_engine_algs,
+ ARRAY_SIZE(ipsec_engine_algs));
+}
+
+static struct platform_driver ipsec_driver = {
+ .probe = ipsec_probe,
+ .remove = __devexit_p(spacc_remove),
+ .driver = {
+ .name = "picoxcell-ipsec",
+#ifdef CONFIG_PM
+ .pm = &spacc_pm_ops,
+#endif /* CONFIG_PM */
+ },
+};
+
+static int __devinit l2_probe(struct platform_device *pdev)
+{
+ return spacc_probe(pdev, SPACC_CRYPTO_L2_MAX_CTXS,
+ SPACC_CRYPTO_L2_CIPHER_PG_SZ,
+ SPACC_CRYPTO_L2_HASH_PG_SZ, SPACC_CRYPTO_L2_FIFO_SZ,
+ l2_engine_algs, ARRAY_SIZE(l2_engine_algs));
+}
+
+static struct platform_driver l2_driver = {
+ .probe = l2_probe,
+ .remove = __devexit_p(spacc_remove),
+ .driver = {
+ .name = "picoxcell-l2",
+#ifdef CONFIG_PM
+ .pm = &spacc_pm_ops,
+#endif /* CONFIG_PM */
+ },
+};
+
+static int __init spacc_init(void)
+{
+ int ret = platform_driver_register(&ipsec_driver);
+ if (ret) {
+ pr_err("failed to register ipsec spacc driver");
+ goto out;
+ }
+
+ ret = platform_driver_register(&l2_driver);
+ if (ret) {
+ pr_err("failed to register l2 spacc driver");
+ goto l2_failed;
+ }
+
+ return 0;
+
+l2_failed:
+ platform_driver_unregister(&ipsec_driver);
+out:
+ return ret;
+}
+module_init(spacc_init);
+
+static void __exit spacc_exit(void)
+{
+ platform_driver_unregister(&ipsec_driver);
+ platform_driver_unregister(&l2_driver);
+}
+module_exit(spacc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jamie Iles");
diff --git a/drivers/crypto/picoxcell_crypto_regs.h b/drivers/crypto/picoxcell_crypto_regs.h
new file mode 100644
index 000000000000..af93442564c9
--- /dev/null
+++ b/drivers/crypto/picoxcell_crypto_regs.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2010 Picochip Ltd., Jamie Iles
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef __PICOXCELL_CRYPTO_REGS_H__
+#define __PICOXCELL_CRYPTO_REGS_H__
+
+#define SPA_STATUS_OK 0
+#define SPA_STATUS_ICV_FAIL 1
+#define SPA_STATUS_MEMORY_ERROR 2
+#define SPA_STATUS_BLOCK_ERROR 3
+
+#define SPA_IRQ_CTRL_STAT_CNT_OFFSET 16
+#define SPA_IRQ_STAT_STAT_MASK (1 << 4)
+#define SPA_FIFO_STAT_STAT_OFFSET 16
+#define SPA_FIFO_STAT_STAT_CNT_MASK (0x3F << SPA_FIFO_STAT_STAT_OFFSET)
+#define SPA_STATUS_RES_CODE_OFFSET 24
+#define SPA_STATUS_RES_CODE_MASK (0x3 << SPA_STATUS_RES_CODE_OFFSET)
+#define SPA_KEY_SZ_CTX_INDEX_OFFSET 8
+#define SPA_KEY_SZ_CIPHER_OFFSET 31
+
+#define SPA_IRQ_EN_REG_OFFSET 0x00000000
+#define SPA_IRQ_STAT_REG_OFFSET 0x00000004
+#define SPA_IRQ_CTRL_REG_OFFSET 0x00000008
+#define SPA_FIFO_STAT_REG_OFFSET 0x0000000C
+#define SPA_SDMA_BRST_SZ_REG_OFFSET 0x00000010
+#define SPA_SRC_PTR_REG_OFFSET 0x00000020
+#define SPA_DST_PTR_REG_OFFSET 0x00000024
+#define SPA_OFFSET_REG_OFFSET 0x00000028
+#define SPA_AAD_LEN_REG_OFFSET 0x0000002C
+#define SPA_PROC_LEN_REG_OFFSET 0x00000030
+#define SPA_ICV_LEN_REG_OFFSET 0x00000034
+#define SPA_ICV_OFFSET_REG_OFFSET 0x00000038
+#define SPA_SW_CTRL_REG_OFFSET 0x0000003C
+#define SPA_CTRL_REG_OFFSET 0x00000040
+#define SPA_AUX_INFO_REG_OFFSET 0x0000004C
+#define SPA_STAT_POP_REG_OFFSET 0x00000050
+#define SPA_STATUS_REG_OFFSET 0x00000054
+#define SPA_KEY_SZ_REG_OFFSET 0x00000100
+#define SPA_CIPH_KEY_BASE_REG_OFFSET 0x00004000
+#define SPA_HASH_KEY_BASE_REG_OFFSET 0x00008000
+#define SPA_RC4_CTX_BASE_REG_OFFSET 0x00020000
+
+#define SPA_IRQ_EN_REG_RESET 0x00000000
+#define SPA_IRQ_CTRL_REG_RESET 0x00000000
+#define SPA_FIFO_STAT_REG_RESET 0x00000000
+#define SPA_SDMA_BRST_SZ_REG_RESET 0x00000000
+#define SPA_SRC_PTR_REG_RESET 0x00000000
+#define SPA_DST_PTR_REG_RESET 0x00000000
+#define SPA_OFFSET_REG_RESET 0x00000000
+#define SPA_AAD_LEN_REG_RESET 0x00000000
+#define SPA_PROC_LEN_REG_RESET 0x00000000
+#define SPA_ICV_LEN_REG_RESET 0x00000000
+#define SPA_ICV_OFFSET_REG_RESET 0x00000000
+#define SPA_SW_CTRL_REG_RESET 0x00000000
+#define SPA_CTRL_REG_RESET 0x00000000
+#define SPA_AUX_INFO_REG_RESET 0x00000000
+#define SPA_STAT_POP_REG_RESET 0x00000000
+#define SPA_STATUS_REG_RESET 0x00000000
+#define SPA_KEY_SZ_REG_RESET 0x00000000
+
+#define SPA_CTRL_HASH_ALG_IDX 4
+#define SPA_CTRL_CIPH_MODE_IDX 8
+#define SPA_CTRL_HASH_MODE_IDX 12
+#define SPA_CTRL_CTX_IDX 16
+#define SPA_CTRL_ENCRYPT_IDX 24
+#define SPA_CTRL_AAD_COPY 25
+#define SPA_CTRL_ICV_PT 26
+#define SPA_CTRL_ICV_ENC 27
+#define SPA_CTRL_ICV_APPEND 28
+#define SPA_CTRL_KEY_EXP 29
+
+#define SPA_KEY_SZ_CXT_IDX 8
+#define SPA_KEY_SZ_CIPHER_IDX 31
+
+#define SPA_IRQ_EN_CMD0_EN (1 << 0)
+#define SPA_IRQ_EN_STAT_EN (1 << 4)
+#define SPA_IRQ_EN_GLBL_EN (1 << 31)
+
+#define SPA_CTRL_CIPH_ALG_NULL 0x00
+#define SPA_CTRL_CIPH_ALG_DES 0x01
+#define SPA_CTRL_CIPH_ALG_AES 0x02
+#define SPA_CTRL_CIPH_ALG_RC4 0x03
+#define SPA_CTRL_CIPH_ALG_MULTI2 0x04
+#define SPA_CTRL_CIPH_ALG_KASUMI 0x05
+
+#define SPA_CTRL_HASH_ALG_NULL (0x00 << SPA_CTRL_HASH_ALG_IDX)
+#define SPA_CTRL_HASH_ALG_MD5 (0x01 << SPA_CTRL_HASH_ALG_IDX)
+#define SPA_CTRL_HASH_ALG_SHA (0x02 << SPA_CTRL_HASH_ALG_IDX)
+#define SPA_CTRL_HASH_ALG_SHA224 (0x03 << SPA_CTRL_HASH_ALG_IDX)
+#define SPA_CTRL_HASH_ALG_SHA256 (0x04 << SPA_CTRL_HASH_ALG_IDX)
+#define SPA_CTRL_HASH_ALG_SHA384 (0x05 << SPA_CTRL_HASH_ALG_IDX)
+#define SPA_CTRL_HASH_ALG_SHA512 (0x06 << SPA_CTRL_HASH_ALG_IDX)
+#define SPA_CTRL_HASH_ALG_AESMAC (0x07 << SPA_CTRL_HASH_ALG_IDX)
+#define SPA_CTRL_HASH_ALG_AESCMAC (0x08 << SPA_CTRL_HASH_ALG_IDX)
+#define SPA_CTRL_HASH_ALG_KASF9 (0x09 << SPA_CTRL_HASH_ALG_IDX)
+
+#define SPA_CTRL_CIPH_MODE_NULL (0x00 << SPA_CTRL_CIPH_MODE_IDX)
+#define SPA_CTRL_CIPH_MODE_ECB (0x00 << SPA_CTRL_CIPH_MODE_IDX)
+#define SPA_CTRL_CIPH_MODE_CBC (0x01 << SPA_CTRL_CIPH_MODE_IDX)
+#define SPA_CTRL_CIPH_MODE_CTR (0x02 << SPA_CTRL_CIPH_MODE_IDX)
+#define SPA_CTRL_CIPH_MODE_CCM (0x03 << SPA_CTRL_CIPH_MODE_IDX)
+#define SPA_CTRL_CIPH_MODE_GCM (0x05 << SPA_CTRL_CIPH_MODE_IDX)
+#define SPA_CTRL_CIPH_MODE_OFB (0x07 << SPA_CTRL_CIPH_MODE_IDX)
+#define SPA_CTRL_CIPH_MODE_CFB (0x08 << SPA_CTRL_CIPH_MODE_IDX)
+#define SPA_CTRL_CIPH_MODE_F8 (0x09 << SPA_CTRL_CIPH_MODE_IDX)
+
+#define SPA_CTRL_HASH_MODE_RAW (0x00 << SPA_CTRL_HASH_MODE_IDX)
+#define SPA_CTRL_HASH_MODE_SSLMAC (0x01 << SPA_CTRL_HASH_MODE_IDX)
+#define SPA_CTRL_HASH_MODE_HMAC (0x02 << SPA_CTRL_HASH_MODE_IDX)
+
+#define SPA_FIFO_STAT_EMPTY (1 << 31)
+#define SPA_FIFO_CMD_FULL (1 << 7)
+
+#endif /* __PICOXCELL_CRYPTO_REGS_H__ */
diff --git a/drivers/gpio/ml_ioh_gpio.c b/drivers/gpio/ml_ioh_gpio.c
index cead8e6ff345..7f6f01a4b145 100644
--- a/drivers/gpio/ml_ioh_gpio.c
+++ b/drivers/gpio/ml_ioh_gpio.c
@@ -326,6 +326,7 @@ static DEFINE_PCI_DEVICE_TABLE(ioh_gpio_pcidev_id) = {
{ PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x802E) },
{ 0, }
};
+MODULE_DEVICE_TABLE(pci, ioh_gpio_pcidev_id);
static struct pci_driver ioh_gpio_driver = {
.name = "ml_ioh_gpio",
diff --git a/drivers/gpio/pch_gpio.c b/drivers/gpio/pch_gpio.c
index 0eba0a75c804..2c6af8705103 100644
--- a/drivers/gpio/pch_gpio.c
+++ b/drivers/gpio/pch_gpio.c
@@ -286,6 +286,7 @@ static DEFINE_PCI_DEVICE_TABLE(pch_gpio_pcidev_id) = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) },
{ 0, }
};
+MODULE_DEVICE_TABLE(pci, pch_gpio_pcidev_id);
static struct pci_driver pch_gpio_driver = {
.name = "pch_gpio",
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 6977a1ce9d98..f73ef4390db6 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -672,7 +672,7 @@ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info)
struct drm_crtc_helper_funcs *crtc_funcs;
u16 *red, *green, *blue, *transp;
struct drm_crtc *crtc;
- int i, rc = 0;
+ int i, j, rc = 0;
int start;
for (i = 0; i < fb_helper->crtc_count; i++) {
@@ -685,7 +685,7 @@ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info)
transp = cmap->transp;
start = cmap->start;
- for (i = 0; i < cmap->len; i++) {
+ for (j = 0; j < cmap->len; j++) {
u16 hred, hgreen, hblue, htransp = 0xffff;
hred = *red++;
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 3dadfa2a8528..28d1d3c24d65 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -164,8 +164,10 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc)
* available. In that case we can't account for this and just
* hope for the best.
*/
- if ((vblrc > 0) && (abs(diff_ns) > 1000000))
+ if ((vblrc > 0) && (abs64(diff_ns) > 1000000)) {
atomic_inc(&dev->_vblank_count[crtc]);
+ smp_mb__after_atomic_inc();
+ }
/* Invalidate all timestamps while vblank irq's are off. */
clear_vblank_timestamps(dev, crtc);
@@ -491,6 +493,12 @@ void drm_calc_timestamping_constants(struct drm_crtc *crtc)
/* Dot clock in Hz: */
dotclock = (u64) crtc->hwmode.clock * 1000;
+ /* Fields of interlaced scanout modes are only halve a frame duration.
+ * Double the dotclock to get halve the frame-/line-/pixelduration.
+ */
+ if (crtc->hwmode.flags & DRM_MODE_FLAG_INTERLACE)
+ dotclock *= 2;
+
/* Valid dotclock? */
if (dotclock > 0) {
/* Convert scanline length in pixels and video dot clock to
@@ -603,14 +611,6 @@ int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev, int crtc,
return -EAGAIN;
}
- /* Don't know yet how to handle interlaced or
- * double scan modes. Just no-op for now.
- */
- if (mode->flags & (DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLSCAN)) {
- DRM_DEBUG("crtc %d: Noop due to unsupported mode.\n", crtc);
- return -ENOTSUPP;
- }
-
/* Get current scanout position with system timestamp.
* Repeat query up to DRM_TIMESTAMP_MAXRETRIES times
* if single query takes longer than max_error nanoseconds.
@@ -858,10 +858,11 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc)
if (rc) {
tslot = atomic_read(&dev->_vblank_count[crtc]) + diff;
vblanktimestamp(dev, crtc, tslot) = t_vblank;
- smp_wmb();
}
+ smp_mb__before_atomic_inc();
atomic_add(diff, &dev->_vblank_count[crtc]);
+ smp_mb__after_atomic_inc();
}
/**
@@ -1011,7 +1012,8 @@ int drm_modeset_ctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
struct drm_modeset_ctl *modeset = data;
- int crtc, ret = 0;
+ int ret = 0;
+ unsigned int crtc;
/* If drm_vblank_init() hasn't been called yet, just no-op */
if (!dev->num_crtcs)
@@ -1293,15 +1295,16 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc)
* e.g., due to spurious vblank interrupts. We need to
* ignore those for accounting.
*/
- if (abs(diff_ns) > DRM_REDUNDANT_VBLIRQ_THRESH_NS) {
+ if (abs64(diff_ns) > DRM_REDUNDANT_VBLIRQ_THRESH_NS) {
/* Store new timestamp in ringbuffer. */
vblanktimestamp(dev, crtc, vblcount + 1) = tvblank;
- smp_wmb();
/* Increment cooked vblank count. This also atomically commits
* the timestamp computed above.
*/
+ smp_mb__before_atomic_inc();
atomic_inc(&dev->_vblank_count[crtc]);
+ smp_mb__after_atomic_inc();
} else {
DRM_DEBUG("crtc %d: Redundant vblirq ignored. diff_ns = %d\n",
crtc, (int) diff_ns);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3601466c5502..4ff9b6cc973f 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -865,7 +865,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
int max_freq;
/* RPSTAT1 is in the GT power well */
- __gen6_force_wake_get(dev_priv);
+ __gen6_gt_force_wake_get(dev_priv);
seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
seq_printf(m, "RPSTAT1: 0x%08x\n", I915_READ(GEN6_RPSTAT1));
@@ -888,7 +888,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
max_freq * 100);
- __gen6_force_wake_put(dev_priv);
+ __gen6_gt_force_wake_put(dev_priv);
} else {
seq_printf(m, "no P-state info available\n");
}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 17bd766f2081..e33d9be7df3b 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1895,6 +1895,17 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
if (IS_GEN2(dev))
dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(30));
+ /* 965GM sometimes incorrectly writes to hardware status page (HWS)
+ * using 32bit addressing, overwriting memory if HWS is located
+ * above 4GB.
+ *
+ * The documentation also mentions an issue with undefined
+ * behaviour if any general state is accessed within a page above 4GB,
+ * which also needs to be handled carefully.
+ */
+ if (IS_BROADWATER(dev) || IS_CRESTLINE(dev))
+ dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(32));
+
mmio_bar = IS_GEN2(dev) ? 1 : 0;
dev_priv->regs = pci_iomap(dev->pdev, mmio_bar, 0);
if (!dev_priv->regs) {
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 0ad533f06af9..22ec066adae6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -46,6 +46,9 @@ module_param_named(fbpercrtc, i915_fbpercrtc, int, 0400);
unsigned int i915_powersave = 1;
module_param_named(powersave, i915_powersave, int, 0600);
+unsigned int i915_semaphores = 0;
+module_param_named(semaphores, i915_semaphores, int, 0600);
+
unsigned int i915_enable_rc6 = 0;
module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600);
@@ -254,7 +257,7 @@ void intel_detect_pch (struct drm_device *dev)
}
}
-void __gen6_force_wake_get(struct drm_i915_private *dev_priv)
+void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
{
int count;
@@ -270,12 +273,22 @@ void __gen6_force_wake_get(struct drm_i915_private *dev_priv)
udelay(10);
}
-void __gen6_force_wake_put(struct drm_i915_private *dev_priv)
+void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
{
I915_WRITE_NOTRACE(FORCEWAKE, 0);
POSTING_READ(FORCEWAKE);
}
+void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
+{
+ int loop = 500;
+ u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
+ while (fifo < 20 && loop--) {
+ udelay(10);
+ fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
+ }
+}
+
static int i915_drm_freeze(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 65dfe81d0035..456f40484838 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -956,6 +956,7 @@ extern struct drm_ioctl_desc i915_ioctls[];
extern int i915_max_ioctl;
extern unsigned int i915_fbpercrtc;
extern unsigned int i915_powersave;
+extern unsigned int i915_semaphores;
extern unsigned int i915_lvds_downclock;
extern unsigned int i915_panel_use_ssc;
extern unsigned int i915_enable_rc6;
@@ -1177,6 +1178,9 @@ void i915_gem_detach_phys_object(struct drm_device *dev,
void i915_gem_free_all_phys_object(struct drm_device *dev);
void i915_gem_release(struct drm_device *dev, struct drm_file *file);
+uint32_t
+i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj);
+
/* i915_gem_gtt.c */
void i915_gem_restore_gtt_mappings(struct drm_device *dev);
int __must_check i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj);
@@ -1353,22 +1357,32 @@ __i915_write(64, q)
* must be set to prevent GT core from power down and stale values being
* returned.
*/
-void __gen6_force_wake_get(struct drm_i915_private *dev_priv);
-void __gen6_force_wake_put (struct drm_i915_private *dev_priv);
-static inline u32 i915_safe_read(struct drm_i915_private *dev_priv, u32 reg)
+void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
+void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
+void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
+
+static inline u32 i915_gt_read(struct drm_i915_private *dev_priv, u32 reg)
{
u32 val;
if (dev_priv->info->gen >= 6) {
- __gen6_force_wake_get(dev_priv);
+ __gen6_gt_force_wake_get(dev_priv);
val = I915_READ(reg);
- __gen6_force_wake_put(dev_priv);
+ __gen6_gt_force_wake_put(dev_priv);
} else
val = I915_READ(reg);
return val;
}
+static inline void i915_gt_write(struct drm_i915_private *dev_priv,
+ u32 reg, u32 val)
+{
+ if (dev_priv->info->gen >= 6)
+ __gen6_gt_wait_for_fifo(dev_priv);
+ I915_WRITE(reg, val);
+}
+
static inline void
i915_write(struct drm_i915_private *dev_priv, u32 reg, u64 val, int len)
{
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index cf4f74c7c6fb..36e66cc5225e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1398,7 +1398,7 @@ i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj)
* Return the required GTT alignment for an object, only taking into account
* unfenced tiled surface requirements.
*/
-static uint32_t
+uint32_t
i915_gem_get_unfenced_gtt_alignment(struct drm_i915_gem_object *obj)
{
struct drm_device *dev = obj->base.dev;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d2f445e825f2..50ab1614571c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -772,8 +772,8 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
if (from == NULL || to == from)
return 0;
- /* XXX gpu semaphores are currently causing hard hangs on SNB mobile */
- if (INTEL_INFO(obj->base.dev)->gen < 6 || IS_MOBILE(obj->base.dev))
+ /* XXX gpu semaphores are implicated in various hard hangs on SNB */
+ if (INTEL_INFO(obj->base.dev)->gen < 6 || !i915_semaphores)
return i915_gem_object_wait_rendering(obj, true);
idx = intel_ring_sync_index(from, to);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 22a32b9932c5..d64843e18df2 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -349,14 +349,27 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
(obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end &&
i915_gem_object_fence_ok(obj, args->tiling_mode));
- obj->tiling_changed = true;
- obj->tiling_mode = args->tiling_mode;
- obj->stride = args->stride;
+ /* Rebind if we need a change of alignment */
+ if (!obj->map_and_fenceable) {
+ u32 unfenced_alignment =
+ i915_gem_get_unfenced_gtt_alignment(obj);
+ if (obj->gtt_offset & (unfenced_alignment - 1))
+ ret = i915_gem_object_unbind(obj);
+ }
+
+ if (ret == 0) {
+ obj->tiling_changed = true;
+ obj->tiling_mode = args->tiling_mode;
+ obj->stride = args->stride;
+ }
}
+ /* we have to maintain this existing ABI... */
+ args->stride = obj->stride;
+ args->tiling_mode = obj->tiling_mode;
drm_gem_object_unreference(&obj->base);
mutex_unlock(&dev->struct_mutex);
- return 0;
+ return ret;
}
/**
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 97f946dcc1aa..8a9e08bf1cf7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -316,6 +316,8 @@ static void i915_hotplug_work_func(struct work_struct *work)
struct drm_mode_config *mode_config = &dev->mode_config;
struct intel_encoder *encoder;
+ DRM_DEBUG_KMS("running encoder hotplug functions\n");
+
list_for_each_entry(encoder, &mode_config->encoder_list, base.head)
if (encoder->hot_plug)
encoder->hot_plug(encoder);
@@ -1649,9 +1651,7 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
} else {
hotplug_mask = SDE_CRT_HOTPLUG | SDE_PORTB_HOTPLUG |
SDE_PORTC_HOTPLUG | SDE_PORTD_HOTPLUG;
- hotplug_mask |= SDE_AUX_MASK | SDE_FDI_MASK | SDE_TRANS_MASK;
- I915_WRITE(FDI_RXA_IMR, 0);
- I915_WRITE(FDI_RXB_IMR, 0);
+ hotplug_mask |= SDE_AUX_MASK;
}
dev_priv->pch_irq_mask = ~hotplug_mask;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 15d94c63918c..2abe240dae58 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3271,6 +3271,8 @@
#define FORCEWAKE 0xA18C
#define FORCEWAKE_ACK 0x130090
+#define GT_FIFO_FREE_ENTRIES 0x120008
+
#define GEN6_RPNSWREQ 0xA008
#define GEN6_TURBO_DISABLE (1<<31)
#define GEN6_FREQUENCY(x) ((x)<<25)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3b006536b3d2..49fb54fd9a18 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1219,7 +1219,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
u32 blt_ecoskpd;
/* Make sure blitter notifies FBC of writes */
- __gen6_force_wake_get(dev_priv);
+ __gen6_gt_force_wake_get(dev_priv);
blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
GEN6_BLITTER_LOCK_SHIFT;
@@ -1230,7 +1230,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
GEN6_BLITTER_LOCK_SHIFT);
I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
POSTING_READ(GEN6_BLITTER_ECOSKPD);
- __gen6_force_wake_put(dev_priv);
+ __gen6_gt_force_wake_put(dev_priv);
}
static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
@@ -1630,19 +1630,19 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
struct drm_i915_gem_object *obj = to_intel_framebuffer(old_fb)->obj;
wait_event(dev_priv->pending_flip_queue,
+ atomic_read(&dev_priv->mm.wedged) ||
atomic_read(&obj->pending_flip) == 0);
/* Big Hammer, we also need to ensure that any pending
* MI_WAIT_FOR_EVENT inside a user batch buffer on the
* current scanout is retired before unpinning the old
* framebuffer.
+ *
+ * This should only fail upon a hung GPU, in which case we
+ * can safely continue.
*/
ret = i915_gem_object_flush_gpu(obj, false);
- if (ret) {
- i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj);
- mutex_unlock(&dev->struct_mutex);
- return ret;
- }
+ (void) ret;
}
ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y,
@@ -2045,6 +2045,31 @@ static void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
atomic_read(&obj->pending_flip) == 0);
}
+static bool intel_crtc_driving_pch(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_mode_config *mode_config = &dev->mode_config;
+ struct intel_encoder *encoder;
+
+ /*
+ * If there's a non-PCH eDP on this crtc, it must be DP_A, and that
+ * must be driven by its own crtc; no sharing is possible.
+ */
+ list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
+ if (encoder->base.crtc != crtc)
+ continue;
+
+ switch (encoder->type) {
+ case INTEL_OUTPUT_EDP:
+ if (!intel_encoder_is_pch_edp(&encoder->base))
+ return false;
+ continue;
+ }
+ }
+
+ return true;
+}
+
static void ironlake_crtc_enable(struct drm_crtc *crtc)
{
struct drm_device *dev = crtc->dev;
@@ -2053,6 +2078,7 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
int pipe = intel_crtc->pipe;
int plane = intel_crtc->plane;
u32 reg, temp;
+ bool is_pch_port = false;
if (intel_crtc->active)
return;
@@ -2066,7 +2092,56 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
I915_WRITE(PCH_LVDS, temp | LVDS_PORT_EN);
}
- ironlake_fdi_enable(crtc);
+ is_pch_port = intel_crtc_driving_pch(crtc);
+
+ if (is_pch_port)
+ ironlake_fdi_enable(crtc);
+ else {
+ /* disable CPU FDI tx and PCH FDI rx */
+ reg = FDI_TX_CTL(pipe);
+ temp = I915_READ(reg);
+ I915_WRITE(reg, temp & ~FDI_TX_ENABLE);
+ POSTING_READ(reg);
+
+ reg = FDI_RX_CTL(pipe);
+ temp = I915_READ(reg);
+ temp &= ~(0x7 << 16);
+ temp |= (I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK) << 11;
+ I915_WRITE(reg, temp & ~FDI_RX_ENABLE);
+
+ POSTING_READ(reg);
+ udelay(100);
+
+ /* Ironlake workaround, disable clock pointer after downing FDI */
+ if (HAS_PCH_IBX(dev))
+ I915_WRITE(FDI_RX_CHICKEN(pipe),
+ I915_READ(FDI_RX_CHICKEN(pipe) &
+ ~FDI_RX_PHASE_SYNC_POINTER_ENABLE));
+
+ /* still set train pattern 1 */
+ reg = FDI_TX_CTL(pipe);
+ temp = I915_READ(reg);
+ temp &= ~FDI_LINK_TRAIN_NONE;
+ temp |= FDI_LINK_TRAIN_PATTERN_1;
+ I915_WRITE(reg, temp);
+
+ reg = FDI_RX_CTL(pipe);
+ temp = I915_READ(reg);
+ if (HAS_PCH_CPT(dev)) {
+ temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
+ temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
+ } else {
+ temp &= ~FDI_LINK_TRAIN_NONE;
+ temp |= FDI_LINK_TRAIN_PATTERN_1;
+ }
+ /* BPC in FDI rx is consistent with that in PIPECONF */
+ temp &= ~(0x07 << 16);
+ temp |= (I915_READ(PIPECONF(pipe)) & PIPE_BPC_MASK) << 11;
+ I915_WRITE(reg, temp);
+
+ POSTING_READ(reg);
+ udelay(100);
+ }
/* Enable panel fitting for LVDS */
if (dev_priv->pch_pf_size &&
@@ -2100,6 +2175,10 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
intel_flush_display_plane(dev, plane);
}
+ /* Skip the PCH stuff if possible */
+ if (!is_pch_port)
+ goto done;
+
/* For PCH output, training FDI link */
if (IS_GEN6(dev))
gen6_fdi_link_train(crtc);
@@ -2184,7 +2263,7 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
I915_WRITE(reg, temp | TRANS_ENABLE);
if (wait_for(I915_READ(reg) & TRANS_STATE_ENABLE, 100))
DRM_ERROR("failed to enable transcoder %d\n", pipe);
-
+done:
intel_crtc_load_lut(crtc);
intel_update_fbc(dev);
intel_crtc_update_cursor(crtc, true);
@@ -6203,7 +6282,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
* userspace...
*/
I915_WRITE(GEN6_RC_STATE, 0);
- __gen6_force_wake_get(dev_priv);
+ __gen6_gt_force_wake_get(dev_priv);
/* disable the counters and set deterministic thresholds */
I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -6301,7 +6380,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
/* enable all PM interrupts */
I915_WRITE(GEN6_PMINTRMSK, 0);
- __gen6_force_wake_put(dev_priv);
+ __gen6_gt_force_wake_put(dev_priv);
}
void intel_enable_clock_gating(struct drm_device *dev)
@@ -6496,7 +6575,7 @@ static void ironlake_disable_rc6(struct drm_device *dev)
POSTING_READ(RSTDBYCTL);
}
- ironlake_disable_rc6(dev);
+ ironlake_teardown_rc6(dev);
}
static int ironlake_setup_rc6(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index c65992df458d..f8f86e57df22 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -208,7 +208,6 @@ u32 intel_panel_get_backlight(struct drm_device *dev)
val &= ~1;
pci_read_config_byte(dev->pdev, PCI_LBPC, &lbpc);
val *= lbpc;
- val >>= 1;
}
}
@@ -235,11 +234,11 @@ void intel_panel_set_backlight(struct drm_device *dev, u32 level)
if (is_backlight_combination_mode(dev)){
u32 max = intel_panel_get_max_backlight(dev);
- u8 lpbc;
+ u8 lbpc;
- lpbc = level * 0xfe / max + 1;
- level /= lpbc;
- pci_write_config_byte(dev->pdev, PCI_LBPC, lpbc);
+ lbpc = level * 0xfe / max + 1;
+ level /= lbpc;
+ pci_write_config_byte(dev->pdev, PCI_LBPC, lbpc);
}
tmp = I915_READ(BLC_PWM_CTL);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 6d6fde85a636..34306865a5df 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -14,22 +14,23 @@ struct intel_hw_status_page {
struct drm_i915_gem_object *obj;
};
-#define I915_RING_READ(reg) i915_safe_read(dev_priv, reg)
+#define I915_RING_READ(reg) i915_gt_read(dev_priv, reg)
+#define I915_RING_WRITE(reg, val) i915_gt_write(dev_priv, reg, val)
#define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL((ring)->mmio_base))
-#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val)
+#define I915_WRITE_TAIL(ring, val) I915_RING_WRITE(RING_TAIL((ring)->mmio_base), val)
#define I915_READ_START(ring) I915_RING_READ(RING_START((ring)->mmio_base))
-#define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val)
+#define I915_WRITE_START(ring, val) I915_RING_WRITE(RING_START((ring)->mmio_base), val)
#define I915_READ_HEAD(ring) I915_RING_READ(RING_HEAD((ring)->mmio_base))
-#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val)
+#define I915_WRITE_HEAD(ring, val) I915_RING_WRITE(RING_HEAD((ring)->mmio_base), val)
#define I915_READ_CTL(ring) I915_RING_READ(RING_CTL((ring)->mmio_base))
-#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val)
+#define I915_WRITE_CTL(ring, val) I915_RING_WRITE(RING_CTL((ring)->mmio_base), val)
-#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
#define I915_READ_IMR(ring) I915_RING_READ(RING_IMR((ring)->mmio_base))
+#define I915_WRITE_IMR(ring, val) I915_RING_WRITE(RING_IMR((ring)->mmio_base), val)
#define I915_READ_NOPID(ring) I915_RING_READ(RING_NOPID((ring)->mmio_base))
#define I915_READ_SYNC_0(ring) I915_RING_READ(RING_SYNC_0((ring)->mmio_base))
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 49e5e99917e2..6bdab891c64e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -6228,7 +6228,7 @@ parse_dcb15_entry(struct drm_device *dev, struct dcb_table *dcb,
entry->tvconf.has_component_output = false;
break;
case OUTPUT_LVDS:
- if ((conn & 0x00003f00) != 0x10)
+ if ((conn & 0x00003f00) >> 8 != 0x10)
entry->lvdsconf.use_straps_for_mode = true;
entry->lvdsconf.use_power_scripts = true;
break;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index a7fae26f4654..a52184007f5f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -49,7 +49,10 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
DRM_ERROR("bo %p still attached to GEM object\n", bo);
nv10_mem_put_tile_region(dev, nvbo->tile, NULL);
- nouveau_vm_put(&nvbo->vma);
+ if (nvbo->vma.node) {
+ nouveau_vm_unmap(&nvbo->vma);
+ nouveau_vm_put(&nvbo->vma);
+ }
kfree(nvbo);
}
@@ -128,6 +131,7 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
}
}
+ nvbo->bo.mem.num_pages = size >> PAGE_SHIFT;
nouveau_bo_placement_set(nvbo, flags, 0);
nvbo->channel = chan;
@@ -166,17 +170,17 @@ static void
set_placement_range(struct nouveau_bo *nvbo, uint32_t type)
{
struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev);
+ int vram_pages = dev_priv->vram_size >> PAGE_SHIFT;
if (dev_priv->card_type == NV_10 &&
- nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM)) {
+ nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM) &&
+ nvbo->bo.mem.num_pages < vram_pages / 2) {
/*
* Make sure that the color and depth buffers are handled
* by independent memory controller units. Up to a 9x
* speed up when alpha-blending and depth-test are enabled
* at the same time.
*/
- int vram_pages = dev_priv->vram_size >> PAGE_SHIFT;
-
if (nvbo->tile_flags & NOUVEAU_GEM_TILE_ZETA) {
nvbo->placement.fpfn = vram_pages / 2;
nvbo->placement.lpfn = ~0;
@@ -785,7 +789,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
if (ret)
goto out;
- ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+ ret = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem);
out:
ttm_bo_mem_put(bo, &tmp_mem);
return ret;
@@ -811,11 +815,11 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
if (ret)
return ret;
- ret = ttm_bo_move_ttm(bo, evict, no_wait_reserve, no_wait_gpu, &tmp_mem);
+ ret = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem);
if (ret)
goto out;
- ret = nouveau_bo_move_m2mf(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
+ ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_reserve, no_wait_gpu, new_mem);
if (ret)
goto out;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index a21e00076839..390d82c3c4b0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -507,6 +507,7 @@ nouveau_connector_native_mode(struct drm_connector *connector)
int high_w = 0, high_h = 0, high_v = 0;
list_for_each_entry(mode, &nv_connector->base.probed_modes, head) {
+ mode->vrefresh = drm_mode_vrefresh(mode);
if (helper->mode_valid(connector, mode) != MODE_OK ||
(mode->flags & DRM_MODE_FLAG_INTERLACE))
continue;
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 65699bfaaaea..b368ed74aad7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -83,7 +83,8 @@ nouveau_dma_init(struct nouveau_channel *chan)
return ret;
/* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */
- ret = nouveau_notifier_alloc(chan, NvNotify0, 32, &chan->m2mf_ntfy);
+ ret = nouveau_notifier_alloc(chan, NvNotify0, 32, 0xfd0, 0x1000,
+ &chan->m2mf_ntfy);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 9821fcacc3d2..982d70b12722 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -852,7 +852,8 @@ extern const struct ttm_mem_type_manager_func nouveau_vram_manager;
extern int nouveau_notifier_init_channel(struct nouveau_channel *);
extern void nouveau_notifier_takedown_channel(struct nouveau_channel *);
extern int nouveau_notifier_alloc(struct nouveau_channel *, uint32_t handle,
- int cout, uint32_t *offset);
+ int cout, uint32_t start, uint32_t end,
+ uint32_t *offset);
extern int nouveau_notifier_offset(struct nouveau_gpuobj *, uint32_t *);
extern int nouveau_ioctl_notifier_alloc(struct drm_device *, void *data,
struct drm_file *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 26347b7cd872..b0fb9bdcddb7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -725,8 +725,10 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man,
ret = vram->get(dev, mem->num_pages << PAGE_SHIFT,
mem->page_alignment << PAGE_SHIFT, size_nc,
(nvbo->tile_flags >> 8) & 0xff, &node);
- if (ret)
- return ret;
+ if (ret) {
+ mem->mm_node = NULL;
+ return (ret == -ENOSPC) ? 0 : ret;
+ }
node->page_shift = 12;
if (nvbo->vma.node)
diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.c b/drivers/gpu/drm/nouveau/nouveau_mm.c
index 8844b50c3e54..7609756b6faf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mm.c
@@ -123,7 +123,7 @@ nouveau_mm_get(struct nouveau_mm *rmm, int type, u32 size, u32 size_nc,
return 0;
}
- return -ENOMEM;
+ return -ENOSPC;
}
int
diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c
index fe29d604b820..5ea167623a82 100644
--- a/drivers/gpu/drm/nouveau/nouveau_notifier.c
+++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c
@@ -96,7 +96,8 @@ nouveau_notifier_gpuobj_dtor(struct drm_device *dev,
int
nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
- int size, uint32_t *b_offset)
+ int size, uint32_t start, uint32_t end,
+ uint32_t *b_offset)
{
struct drm_device *dev = chan->dev;
struct nouveau_gpuobj *nobj = NULL;
@@ -104,9 +105,10 @@ nouveau_notifier_alloc(struct nouveau_channel *chan, uint32_t handle,
uint32_t offset;
int target, ret;
- mem = drm_mm_search_free(&chan->notifier_heap, size, 0, 0);
+ mem = drm_mm_search_free_in_range(&chan->notifier_heap, size, 0,
+ start, end, 0);
if (mem)
- mem = drm_mm_get_block(mem, size, 0);
+ mem = drm_mm_get_block_range(mem, size, 0, start, end);
if (!mem) {
NV_ERROR(dev, "Channel %d notifier block full\n", chan->id);
return -ENOMEM;
@@ -177,7 +179,8 @@ nouveau_ioctl_notifier_alloc(struct drm_device *dev, void *data,
if (IS_ERR(chan))
return PTR_ERR(chan);
- ret = nouveau_notifier_alloc(chan, na->handle, na->size, &na->offset);
+ ret = nouveau_notifier_alloc(chan, na->handle, na->size, 0, 0x1000,
+ &na->offset);
nouveau_channel_put(&chan);
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.c b/drivers/gpu/drm/nouveau/nouveau_pm.c
index f05c0cddfeca..4399e2f34db4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.c
@@ -543,7 +543,7 @@ nouveau_pm_resume(struct drm_device *dev)
struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
struct nouveau_pm_level *perflvl;
- if (pm->cur == &pm->boot)
+ if (!pm->cur || pm->cur == &pm->boot)
return;
perflvl = pm->cur;
diff --git a/drivers/gpu/drm/nouveau/nv04_dfp.c b/drivers/gpu/drm/nouveau/nv04_dfp.c
index ef23550407b5..c82db37d9f41 100644
--- a/drivers/gpu/drm/nouveau/nv04_dfp.c
+++ b/drivers/gpu/drm/nouveau/nv04_dfp.c
@@ -342,8 +342,8 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
if (nv_encoder->dcb->type == OUTPUT_LVDS) {
bool duallink, dummy;
- nouveau_bios_parse_lvds_table(dev, nv_connector->native_mode->
- clock, &duallink, &dummy);
+ nouveau_bios_parse_lvds_table(dev, output_mode->clock,
+ &duallink, &dummy);
if (duallink)
regp->fp_control |= (8 << 28);
} else
@@ -518,8 +518,6 @@ static void nv04_lvds_dpms(struct drm_encoder *encoder, int mode)
return;
if (nv_encoder->dcb->lvdsconf.use_power_scripts) {
- struct nouveau_connector *nv_connector = nouveau_encoder_connector_get(nv_encoder);
-
/* when removing an output, crtc may not be set, but PANEL_OFF
* must still be run
*/
@@ -527,12 +525,8 @@ static void nv04_lvds_dpms(struct drm_encoder *encoder, int mode)
nv04_dfp_get_bound_head(dev, nv_encoder->dcb);
if (mode == DRM_MODE_DPMS_ON) {
- if (!nv_connector->native_mode) {
- NV_ERROR(dev, "Not turning on LVDS without native mode\n");
- return;
- }
call_lvds_script(dev, nv_encoder->dcb, head,
- LVDS_PANEL_ON, nv_connector->native_mode->clock);
+ LVDS_PANEL_ON, nv_encoder->mode.clock);
} else
/* pxclk of 0 is fine for PANEL_OFF, and for a
* disconnected LVDS encoder there is no native_mode
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
index 8870d72388c8..18d30c2c1aa6 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -211,18 +211,32 @@ nv40_graph_set_tile_region(struct drm_device *dev, int i)
struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
switch (dev_priv->chipset) {
+ case 0x40:
+ case 0x41: /* guess */
+ case 0x42:
+ case 0x43:
+ case 0x45: /* guess */
+ case 0x4e:
+ nv_wr32(dev, NV20_PGRAPH_TSIZE(i), tile->pitch);
+ nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), tile->limit);
+ nv_wr32(dev, NV20_PGRAPH_TILE(i), tile->addr);
+ nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tile->pitch);
+ nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tile->limit);
+ nv_wr32(dev, NV40_PGRAPH_TILE1(i), tile->addr);
+ break;
case 0x44:
case 0x4a:
- case 0x4e:
nv_wr32(dev, NV20_PGRAPH_TSIZE(i), tile->pitch);
nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), tile->limit);
nv_wr32(dev, NV20_PGRAPH_TILE(i), tile->addr);
break;
-
case 0x46:
case 0x47:
case 0x49:
case 0x4b:
+ case 0x4c:
+ case 0x67:
+ default:
nv_wr32(dev, NV47_PGRAPH_TSIZE(i), tile->pitch);
nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), tile->limit);
nv_wr32(dev, NV47_PGRAPH_TILE(i), tile->addr);
@@ -230,15 +244,6 @@ nv40_graph_set_tile_region(struct drm_device *dev, int i)
nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tile->limit);
nv_wr32(dev, NV40_PGRAPH_TILE1(i), tile->addr);
break;
-
- default:
- nv_wr32(dev, NV20_PGRAPH_TSIZE(i), tile->pitch);
- nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), tile->limit);
- nv_wr32(dev, NV20_PGRAPH_TILE(i), tile->addr);
- nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tile->pitch);
- nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tile->limit);
- nv_wr32(dev, NV40_PGRAPH_TILE1(i), tile->addr);
- break;
}
}
@@ -396,17 +401,20 @@ nv40_graph_init(struct drm_device *dev)
break;
default:
switch (dev_priv->chipset) {
- case 0x46:
- case 0x47:
- case 0x49:
- case 0x4b:
- nv_wr32(dev, 0x400DF0, nv_rd32(dev, NV04_PFB_CFG0));
- nv_wr32(dev, 0x400DF4, nv_rd32(dev, NV04_PFB_CFG1));
- break;
- default:
+ case 0x41:
+ case 0x42:
+ case 0x43:
+ case 0x45:
+ case 0x4e:
+ case 0x44:
+ case 0x4a:
nv_wr32(dev, 0x4009F0, nv_rd32(dev, NV04_PFB_CFG0));
nv_wr32(dev, 0x4009F4, nv_rd32(dev, NV04_PFB_CFG1));
break;
+ default:
+ nv_wr32(dev, 0x400DF0, nv_rd32(dev, NV04_PFB_CFG0));
+ nv_wr32(dev, 0x400DF4, nv_rd32(dev, NV04_PFB_CFG1));
+ break;
}
nv_wr32(dev, 0x4069F0, nv_rd32(dev, NV04_PFB_CFG0));
nv_wr32(dev, 0x4069F4, nv_rd32(dev, NV04_PFB_CFG1));
diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c
index ea0041810ae3..e57caa2a00e3 100644
--- a/drivers/gpu/drm/nouveau/nv50_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv50_instmem.c
@@ -403,16 +403,24 @@ nv50_instmem_unmap(struct nouveau_gpuobj *gpuobj)
void
nv50_instmem_flush(struct drm_device *dev)
{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+ spin_lock(&dev_priv->ramin_lock);
nv_wr32(dev, 0x00330c, 0x00000001);
if (!nv_wait(dev, 0x00330c, 0x00000002, 0x00000000))
NV_ERROR(dev, "PRAMIN flush timeout\n");
+ spin_unlock(&dev_priv->ramin_lock);
}
void
nv84_instmem_flush(struct drm_device *dev)
{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+ spin_lock(&dev_priv->ramin_lock);
nv_wr32(dev, 0x070000, 0x00000001);
if (!nv_wait(dev, 0x070000, 0x00000002, 0x00000000))
NV_ERROR(dev, "PRAMIN flush timeout\n");
+ spin_unlock(&dev_priv->ramin_lock);
}
diff --git a/drivers/gpu/drm/nouveau/nv50_vm.c b/drivers/gpu/drm/nouveau/nv50_vm.c
index 459ff08241e5..6144156f255a 100644
--- a/drivers/gpu/drm/nouveau/nv50_vm.c
+++ b/drivers/gpu/drm/nouveau/nv50_vm.c
@@ -169,7 +169,11 @@ nv50_vm_flush(struct nouveau_vm *vm)
void
nv50_vm_flush_engine(struct drm_device *dev, int engine)
{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+ spin_lock(&dev_priv->ramin_lock);
nv_wr32(dev, 0x100c80, (engine << 16) | 1);
if (!nv_wait(dev, 0x100c80, 0x00000001, 0x00000000))
NV_ERROR(dev, "vm flush timeout: engine %d\n", engine);
+ spin_unlock(&dev_priv->ramin_lock);
}
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 095bc507fb16..a4e5e53e0a62 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -557,9 +557,9 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
/* use recommended ref_div for ss */
if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
- pll->flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP;
if (ss_enabled) {
if (ss->refdiv) {
+ pll->flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP;
pll->flags |= RADEON_PLL_USE_REF_DIV;
pll->reference_div = ss->refdiv;
if (ASIC_IS_AVIVO(rdev))
@@ -662,10 +662,12 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
index, (uint32_t *)&args);
adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10;
if (args.v3.sOutput.ucRefDiv) {
+ pll->flags |= RADEON_PLL_USE_FRAC_FB_DIV;
pll->flags |= RADEON_PLL_USE_REF_DIV;
pll->reference_div = args.v3.sOutput.ucRefDiv;
}
if (args.v3.sOutput.ucPostDiv) {
+ pll->flags |= RADEON_PLL_USE_FRAC_FB_DIV;
pll->flags |= RADEON_PLL_USE_POST_DIV;
pll->post_div = args.v3.sOutput.ucPostDiv;
}
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index d270b3ff896b..6140ea1de45a 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2194,7 +2194,6 @@ int evergreen_mc_init(struct radeon_device *rdev)
rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
}
rdev->mc.visible_vram_size = rdev->mc.aper_size;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
r700_vram_gtt_location(rdev, &rdev->mc);
radeon_update_bandwidth_info(rdev);
@@ -2934,7 +2933,7 @@ static int evergreen_startup(struct radeon_device *rdev)
/* XXX: ontario has problems blitting to gart at the moment */
if (rdev->family == CHIP_PALM) {
rdev->asic->copy = NULL;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
}
/* allocate wb buffer */
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 2adfb03f479b..2be698e78ff2 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -623,7 +623,7 @@ done:
dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
return r;
}
- rdev->mc.active_vram_size = rdev->mc.real_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
return 0;
}
@@ -631,7 +631,7 @@ void evergreen_blit_fini(struct radeon_device *rdev)
{
int r;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
if (rdev->r600_blit.shader_obj == NULL)
return;
/* If we can't reserve the bo, unref should be enough to destroy
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 56deae5bf02e..e372f9e1e5ce 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -70,23 +70,6 @@ MODULE_FIRMWARE(FIRMWARE_R520);
void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
{
- struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc];
- u32 tmp;
-
- /* make sure flip is at vb rather than hb */
- tmp = RREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset);
- tmp &= ~RADEON_CRTC_OFFSET_FLIP_CNTL;
- /* make sure pending bit is asserted */
- tmp |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN;
- WREG32(RADEON_CRTC_OFFSET_CNTL + radeon_crtc->crtc_offset, tmp);
-
- /* set pageflip to happen as late as possible in the vblank interval.
- * same field for crtc1/2
- */
- tmp = RREG32(RADEON_CRTC_GEN_CNTL);
- tmp &= ~RADEON_CRTC_VSTAT_MODE_MASK;
- WREG32(RADEON_CRTC_GEN_CNTL, tmp);
-
/* enable the pflip int */
radeon_irq_kms_pflip_irq_get(rdev, crtc);
}
@@ -1041,7 +1024,7 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
return r;
}
rdev->cp.ready = true;
- rdev->mc.active_vram_size = rdev->mc.real_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
return 0;
}
@@ -1059,7 +1042,7 @@ void r100_cp_fini(struct radeon_device *rdev)
void r100_cp_disable(struct radeon_device *rdev)
{
/* Disable ring */
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
rdev->cp.ready = false;
WREG32(RADEON_CP_CSQ_MODE, 0);
WREG32(RADEON_CP_CSQ_CNTL, 0);
@@ -2329,7 +2312,6 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
/* FIXME we don't use the second aperture yet when we could use it */
if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
rdev->mc.visible_vram_size = rdev->mc.aper_size;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
if (rdev->flags & RADEON_IS_IGP) {
uint32_t tom;
@@ -3490,7 +3472,7 @@ void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track
track->num_texture = 16;
track->maxy = 4096;
track->separate_cube = 0;
- track->aaresolve = true;
+ track->aaresolve = false;
track->aa.robj = NULL;
}
@@ -3801,8 +3783,6 @@ static int r100_startup(struct radeon_device *rdev)
r100_mc_program(rdev);
/* Resume clock */
r100_clock_startup(rdev);
- /* Initialize GPU configuration (# pipes, ...) */
-// r100_gpu_init(rdev);
/* Initialize GART (initialize after TTM so we can allocate
* memory through TTM but finalize after TTM) */
r100_enable_bm(rdev);
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 768c60ee4ab6..069efa8c8ecf 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -910,6 +910,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
track->textures[i].compress_format = R100_TRACK_COMP_NONE;
break;
case R300_TX_FORMAT_X16:
+ case R300_TX_FORMAT_FL_I16:
case R300_TX_FORMAT_Y8X8:
case R300_TX_FORMAT_Z5Y6X5:
case R300_TX_FORMAT_Z6Y5X5:
@@ -922,6 +923,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
track->textures[i].compress_format = R100_TRACK_COMP_NONE;
break;
case R300_TX_FORMAT_Y16X16:
+ case R300_TX_FORMAT_FL_I16A16:
case R300_TX_FORMAT_Z11Y11X10:
case R300_TX_FORMAT_Z10Y11X11:
case R300_TX_FORMAT_W8Z8Y8X8:
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index de88624d5f87..9b3fad23b76c 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1255,7 +1255,6 @@ int r600_mc_init(struct radeon_device *rdev)
rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
rdev->mc.visible_vram_size = rdev->mc.aper_size;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
r600_vram_gtt_location(rdev, &rdev->mc);
if (rdev->flags & RADEON_IS_IGP) {
@@ -1937,7 +1936,7 @@ void r600_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
*/
void r600_cp_stop(struct radeon_device *rdev)
{
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
WREG32(SCRATCH_UMSK, 0);
}
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 41f7aafc97c4..df68d91e8190 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -558,7 +558,7 @@ done:
dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
return r;
}
- rdev->mc.active_vram_size = rdev->mc.real_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
return 0;
}
@@ -566,7 +566,7 @@ void r600_blit_fini(struct radeon_device *rdev)
{
int r;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
if (rdev->r600_blit.shader_obj == NULL)
return;
/* If we can't reserve the bo, unref should be enough to destroy
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 56c48b67ef3d..6b3429495118 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -345,7 +345,6 @@ struct radeon_mc {
* about vram size near mc fb location */
u64 mc_vram_size;
u64 visible_vram_size;
- u64 active_vram_size;
u64 gtt_size;
u64 gtt_start;
u64 gtt_end;
@@ -1448,6 +1447,7 @@ extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *m
extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
extern int radeon_resume_kms(struct drm_device *dev);
extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state);
+extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size);
/* r600, rv610, rv630, rv620, rv635, rv670, rs780, rs880 */
extern bool r600_card_posted(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index e75d63b8e21d..793c5e6026ad 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -834,6 +834,9 @@ static struct radeon_asic sumo_asic = {
.pm_finish = &evergreen_pm_finish,
.pm_init_profile = &rs780_pm_init_profile,
.pm_get_dynpm_state = &r600_pm_get_dynpm_state,
+ .pre_page_flip = &evergreen_pre_page_flip,
+ .page_flip = &evergreen_page_flip,
+ .post_page_flip = &evergreen_post_page_flip,
};
static struct radeon_asic btc_asic = {
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 0e657095de7c..3e7e7f9eb781 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -971,7 +971,7 @@ void radeon_compute_pll_legacy(struct radeon_pll *pll,
max_fractional_feed_div = pll->max_frac_feedback_div;
}
- for (post_div = min_post_div; post_div <= max_post_div; ++post_div) {
+ for (post_div = max_post_div; post_div >= min_post_div; --post_div) {
uint32_t ref_div;
if ((pll->flags & RADEON_PLL_NO_ODD_POST_DIV) && (post_div & 1))
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 66324b5bb5ba..cc44bdfec80f 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -113,11 +113,14 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev,
u32 tiling_flags = 0;
int ret;
int aligned_size, size;
+ int height = mode_cmd->height;
/* need to align pitch with crtc limits */
mode_cmd->pitch = radeon_align_pitch(rdev, mode_cmd->width, mode_cmd->bpp, fb_tiled) * ((mode_cmd->bpp + 1) / 8);
- size = mode_cmd->pitch * mode_cmd->height;
+ if (rdev->family >= CHIP_R600)
+ height = ALIGN(mode_cmd->height, 8);
+ size = mode_cmd->pitch * height;
aligned_size = ALIGN(size, PAGE_SIZE);
ret = radeon_gem_object_create(rdev, aligned_size, 0,
RADEON_GEM_DOMAIN_VRAM,
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index df95eb83dac6..1fe95dfe48c9 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -156,9 +156,12 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
{
struct radeon_device *rdev = dev->dev_private;
struct drm_radeon_gem_info *args = data;
+ struct ttm_mem_type_manager *man;
+
+ man = &rdev->mman.bdev.man[TTM_PL_VRAM];
args->vram_size = rdev->mc.real_vram_size;
- args->vram_visible = rdev->mc.real_vram_size;
+ args->vram_visible = (u64)man->size << PAGE_SHIFT;
if (rdev->stollen_vga_memory)
args->vram_visible -= radeon_bo_size(rdev->stollen_vga_memory);
args->vram_visible -= radeon_fbdev_total_size(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index cf0638c3b7c7..78968b738e88 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -443,7 +443,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc,
(target_fb->bits_per_pixel * 8));
crtc_pitch |= crtc_pitch << 16;
-
+ crtc_offset_cntl |= RADEON_CRTC_GUI_TRIG_OFFSET_LEFT_EN;
if (tiling_flags & RADEON_TILING_MACRO) {
if (ASIC_IS_R300(rdev))
crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN |
@@ -502,6 +502,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc,
gen_cntl_val = RREG32(gen_cntl_reg);
gen_cntl_val &= ~(0xf << 8);
gen_cntl_val |= (format << 8);
+ gen_cntl_val &= ~RADEON_CRTC_VSTAT_MODE_MASK;
WREG32(gen_cntl_reg, gen_cntl_val);
crtc_offset = (u32)base;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index e5b2cf10cbf4..8389b4c63d12 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -589,6 +589,20 @@ void radeon_ttm_fini(struct radeon_device *rdev)
DRM_INFO("radeon: ttm finalized\n");
}
+/* this should only be called at bootup or when userspace
+ * isn't running */
+void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size)
+{
+ struct ttm_mem_type_manager *man;
+
+ if (!rdev->mman.initialized)
+ return;
+
+ man = &rdev->mman.bdev.man[TTM_PL_VRAM];
+ /* this just adjusts TTM size idea, which sets lpfn to the correct value */
+ man->size = size >> PAGE_SHIFT;
+}
+
static struct vm_operations_struct radeon_ttm_vm_ops;
static const struct vm_operations_struct *ttm_vm_ops = NULL;
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 5afe294ed51f..8af4679db23e 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -751,7 +751,6 @@ void rs600_mc_init(struct radeon_device *rdev)
rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
rdev->mc.visible_vram_size = rdev->mc.aper_size;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
base = RREG32_MC(R_000004_MC_FB_LOCATION);
base = G_000004_MC_FB_START(base) << 16;
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 6638c8e4c81b..66c949b7c18c 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -157,7 +157,6 @@ void rs690_mc_init(struct radeon_device *rdev)
rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
rdev->mc.visible_vram_size = rdev->mc.aper_size;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
base = RREG32_MC(R_000100_MCCFG_FB_LOCATION);
base = G_000100_MC_FB_START(base) << 16;
rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index d8ba67690656..714ad45757d0 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -307,7 +307,7 @@ static void rv770_mc_program(struct radeon_device *rdev)
*/
void r700_cp_stop(struct radeon_device *rdev)
{
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
+ radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
WREG32(SCRATCH_UMSK, 0);
}
@@ -1123,7 +1123,6 @@ int rv770_mc_init(struct radeon_device *rdev)
rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE);
rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE);
rdev->mc.visible_vram_size = rdev->mc.aper_size;
- rdev->mc.active_vram_size = rdev->mc.visible_vram_size;
r700_vram_gtt_location(rdev, &rdev->mc);
radeon_update_bandwidth_info(rdev);
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 773e484f1646..297bc9a7d6e6 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -238,13 +238,13 @@ config SENSORS_K8TEMP
will be called k8temp.
config SENSORS_K10TEMP
- tristate "AMD Phenom/Sempron/Turion/Opteron temperature sensor"
+ tristate "AMD Family 10h/11h/12h/14h temperature sensor"
depends on X86 && PCI
help
If you say yes here you get support for the temperature
sensor(s) inside your CPU. Supported are later revisions of
- the AMD Family 10h and all revisions of the AMD Family 11h
- microarchitectures.
+ the AMD Family 10h and all revisions of the AMD Family 11h,
+ 12h (Llano), and 14h (Brazos) microarchitectures.
This driver can also be built as a module. If so, the module
will be called k10temp.
@@ -455,13 +455,14 @@ config SENSORS_JZ4740
called jz4740-hwmon.
config SENSORS_JC42
- tristate "JEDEC JC42.4 compliant temperature sensors"
+ tristate "JEDEC JC42.4 compliant memory module temperature sensors"
depends on I2C
help
- If you say yes here you get support for Jedec JC42.4 compliant
- temperature sensors. Support will include, but not be limited to,
- ADT7408, CAT34TS02,, CAT6095, MAX6604, MCP9805, MCP98242, MCP98243,
- MCP9843, SE97, SE98, STTS424, TSE2002B3, and TS3000B3.
+ If you say yes here, you get support for JEDEC JC42.4 compliant
+ temperature sensors, which are used on many DDR3 memory modules for
+ mobile devices and servers. Support will include, but not be limited
+ to, ADT7408, CAT34TS02, CAT6095, MAX6604, MCP9805, MCP98242, MCP98243,
+ MCP9843, SE97, SE98, STTS424(E), TSE2002B3, and TS3000B3.
This driver can also be built as a module. If so, the module
will be called jc42.
@@ -574,7 +575,7 @@ config SENSORS_LM85
help
If you say yes here you get support for National Semiconductor LM85
sensor chips and clones: ADM1027, ADT7463, ADT7468, EMC6D100,
- EMC6D101 and EMC6D102.
+ EMC6D101, EMC6D102, and EMC6D103.
This driver can also be built as a module. If so, the module
will be called lm85.
diff --git a/drivers/hwmon/ad7414.c b/drivers/hwmon/ad7414.c
index 86d822aa9bbf..d46c0c758ddf 100644
--- a/drivers/hwmon/ad7414.c
+++ b/drivers/hwmon/ad7414.c
@@ -242,6 +242,7 @@ static const struct i2c_device_id ad7414_id[] = {
{ "ad7414", 0 },
{}
};
+MODULE_DEVICE_TABLE(i2c, ad7414_id);
static struct i2c_driver ad7414_driver = {
.driver = {
diff --git a/drivers/hwmon/adt7411.c b/drivers/hwmon/adt7411.c
index f13c843a2964..5cc3e3784b42 100644
--- a/drivers/hwmon/adt7411.c
+++ b/drivers/hwmon/adt7411.c
@@ -334,6 +334,7 @@ static const struct i2c_device_id adt7411_id[] = {
{ "adt7411", 0 },
{ }
};
+MODULE_DEVICE_TABLE(i2c, adt7411_id);
static struct i2c_driver adt7411_driver = {
.driver = {
diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c
index 3f49dd376f02..6e06019015a5 100644
--- a/drivers/hwmon/f71882fg.c
+++ b/drivers/hwmon/f71882fg.c
@@ -37,7 +37,7 @@
#define SIO_F71858FG_LD_HWM 0x02 /* Hardware monitor logical device */
#define SIO_F71882FG_LD_HWM 0x04 /* Hardware monitor logical device */
#define SIO_UNLOCK_KEY 0x87 /* Key to enable Super-I/O */
-#define SIO_LOCK_KEY 0xAA /* Key to diasble Super-I/O */
+#define SIO_LOCK_KEY 0xAA /* Key to disable Super-I/O */
#define SIO_REG_LDSEL 0x07 /* Logical device select */
#define SIO_REG_DEVID 0x20 /* Device ID (2 bytes) */
@@ -2111,7 +2111,6 @@ static int f71882fg_remove(struct platform_device *pdev)
int nr_fans = (data->type == f71882fg) ? 4 : 3;
u8 start_reg = f71882fg_read8(data, F71882FG_REG_START);
- platform_set_drvdata(pdev, NULL);
if (data->hwmon_dev)
hwmon_device_unregister(data->hwmon_dev);
@@ -2178,6 +2177,7 @@ static int f71882fg_remove(struct platform_device *pdev)
}
}
+ platform_set_drvdata(pdev, NULL);
kfree(data);
return 0;
diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c
index 340fc78c8dde..934991237061 100644
--- a/drivers/hwmon/jc42.c
+++ b/drivers/hwmon/jc42.c
@@ -53,6 +53,8 @@ static const unsigned short normal_i2c[] = {
/* Configuration register defines */
#define JC42_CFG_CRIT_ONLY (1 << 2)
+#define JC42_CFG_TCRIT_LOCK (1 << 6)
+#define JC42_CFG_EVENT_LOCK (1 << 7)
#define JC42_CFG_SHUTDOWN (1 << 8)
#define JC42_CFG_HYST_SHIFT 9
#define JC42_CFG_HYST_MASK 0x03
@@ -332,7 +334,7 @@ static ssize_t set_temp_crit_hyst(struct device *dev,
{
struct i2c_client *client = to_i2c_client(dev);
struct jc42_data *data = i2c_get_clientdata(client);
- long val;
+ unsigned long val;
int diff, hyst;
int err;
int ret = count;
@@ -380,14 +382,14 @@ static ssize_t show_alarm(struct device *dev,
static DEVICE_ATTR(temp1_input, S_IRUGO,
show_temp_input, NULL);
-static DEVICE_ATTR(temp1_crit, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(temp1_crit, S_IRUGO,
show_temp_crit, set_temp_crit);
-static DEVICE_ATTR(temp1_min, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(temp1_min, S_IRUGO,
show_temp_min, set_temp_min);
-static DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(temp1_max, S_IRUGO,
show_temp_max, set_temp_max);
-static DEVICE_ATTR(temp1_crit_hyst, S_IWUSR | S_IRUGO,
+static DEVICE_ATTR(temp1_crit_hyst, S_IRUGO,
show_temp_crit_hyst, set_temp_crit_hyst);
static DEVICE_ATTR(temp1_max_hyst, S_IRUGO,
show_temp_max_hyst, NULL);
@@ -412,8 +414,31 @@ static struct attribute *jc42_attributes[] = {
NULL
};
+static mode_t jc42_attribute_mode(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct i2c_client *client = to_i2c_client(dev);
+ struct jc42_data *data = i2c_get_clientdata(client);
+ unsigned int config = data->config;
+ bool readonly;
+
+ if (attr == &dev_attr_temp1_crit.attr)
+ readonly = config & JC42_CFG_TCRIT_LOCK;
+ else if (attr == &dev_attr_temp1_min.attr ||
+ attr == &dev_attr_temp1_max.attr)
+ readonly = config & JC42_CFG_EVENT_LOCK;
+ else if (attr == &dev_attr_temp1_crit_hyst.attr)
+ readonly = config & (JC42_CFG_EVENT_LOCK | JC42_CFG_TCRIT_LOCK);
+ else
+ readonly = true;
+
+ return S_IRUGO | (readonly ? 0 : S_IWUSR);
+}
+
static const struct attribute_group jc42_group = {
.attrs = jc42_attributes,
+ .is_visible = jc42_attribute_mode,
};
/* Return 0 if detection is successful, -ENODEV otherwise */
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index da5a2404cd3e..82bf65aa2968 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -1,5 +1,5 @@
/*
- * k10temp.c - AMD Family 10h/11h processor hardware monitoring
+ * k10temp.c - AMD Family 10h/11h/12h/14h processor hardware monitoring
*
* Copyright (c) 2009 Clemens Ladisch <clemens@ladisch.de>
*
@@ -25,7 +25,7 @@
#include <linux/pci.h>
#include <asm/processor.h>
-MODULE_DESCRIPTION("AMD Family 10h/11h CPU core temperature monitor");
+MODULE_DESCRIPTION("AMD Family 10h/11h/12h/14h CPU core temperature monitor");
MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>");
MODULE_LICENSE("GPL");
@@ -208,6 +208,7 @@ static void __devexit k10temp_remove(struct pci_dev *pdev)
static const struct pci_device_id k10temp_id_table[] = {
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) },
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{}
};
MODULE_DEVICE_TABLE(pci, k10temp_id_table);
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index 1e229847f37a..d2cc28660816 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -41,7 +41,7 @@ static const unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END };
enum chips {
any_chip, lm85b, lm85c,
adm1027, adt7463, adt7468,
- emc6d100, emc6d102
+ emc6d100, emc6d102, emc6d103
};
/* The LM85 registers */
@@ -90,6 +90,9 @@ enum chips {
#define LM85_VERSTEP_EMC6D100_A0 0x60
#define LM85_VERSTEP_EMC6D100_A1 0x61
#define LM85_VERSTEP_EMC6D102 0x65
+#define LM85_VERSTEP_EMC6D103_A0 0x68
+#define LM85_VERSTEP_EMC6D103_A1 0x69
+#define LM85_VERSTEP_EMC6D103S 0x6A /* Also known as EMC6D103:A2 */
#define LM85_REG_CONFIG 0x40
@@ -348,6 +351,7 @@ static const struct i2c_device_id lm85_id[] = {
{ "emc6d100", emc6d100 },
{ "emc6d101", emc6d100 },
{ "emc6d102", emc6d102 },
+ { "emc6d103", emc6d103 },
{ }
};
MODULE_DEVICE_TABLE(i2c, lm85_id);
@@ -1250,6 +1254,20 @@ static int lm85_detect(struct i2c_client *client, struct i2c_board_info *info)
case LM85_VERSTEP_EMC6D102:
type_name = "emc6d102";
break;
+ case LM85_VERSTEP_EMC6D103_A0:
+ case LM85_VERSTEP_EMC6D103_A1:
+ type_name = "emc6d103";
+ break;
+ /*
+ * Registers apparently missing in EMC6D103S/EMC6D103:A2
+ * compared to EMC6D103:A0, EMC6D103:A1, and EMC6D102
+ * (according to the data sheets), but used unconditionally
+ * in the driver: 62[5:7], 6D[0:7], and 6E[0:7].
+ * So skip EMC6D103S for now.
+ case LM85_VERSTEP_EMC6D103S:
+ type_name = "emc6d103s";
+ break;
+ */
}
} else {
dev_dbg(&adapter->dev,
@@ -1283,6 +1301,7 @@ static int lm85_probe(struct i2c_client *client,
case adt7468:
case emc6d100:
case emc6d102:
+ case emc6d103:
data->freq_map = adm1027_freq_map;
break;
default:
@@ -1468,7 +1487,7 @@ static struct lm85_data *lm85_update_device(struct device *dev)
/* More alarm bits */
data->alarms |= lm85_read_value(client,
EMC6D100_REG_ALARM3) << 16;
- } else if (data->type == emc6d102) {
+ } else if (data->type == emc6d102 || data->type == emc6d103) {
/* Have to read LSB bits after the MSB ones because
the reading of the MSB bits has frozen the
LSBs (backward from the ADM1027).
diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 2e067dd2ee51..50ea1f43bdc1 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -29,6 +29,7 @@
#include <linux/pci.h>
#include <linux/mutex.h>
#include <linux/ktime.h>
+#include <linux/slab.h>
#define PCH_EVENT_SET 0 /* I2C Interrupt Event Set Status */
#define PCH_EVENT_NONE 1 /* I2C Interrupt Event Clear Status */
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index ef3bcb1ce864..1b46a9d9f907 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -249,7 +249,7 @@ static struct i2c_adapter ocores_adapter = {
static int ocores_i2c_of_probe(struct platform_device* pdev,
struct ocores_i2c* i2c)
{
- __be32* val;
+ const __be32* val;
val = of_get_property(pdev->dev.of_node, "regstep", NULL);
if (!val) {
@@ -330,9 +330,7 @@ static int __devinit ocores_i2c_probe(struct platform_device *pdev)
i2c->adap = ocores_adapter;
i2c_set_adapdata(&i2c->adap, i2c);
i2c->adap.dev.parent = &pdev->dev;
-#ifdef CONFIG_OF
i2c->adap.dev.of_node = pdev->dev.of_node;
-#endif
/* add i2c adapter to i2c tree */
ret = i2c_add_adapter(&i2c->adap);
@@ -390,15 +388,11 @@ static int ocores_i2c_resume(struct platform_device *pdev)
#define ocores_i2c_resume NULL
#endif
-#ifdef CONFIG_OF
static struct of_device_id ocores_i2c_match[] = {
- {
- .compatible = "opencores,i2c-ocores",
- },
- {},
+ { .compatible = "opencores,i2c-ocores", },
+ {},
};
MODULE_DEVICE_TABLE(of, ocores_i2c_match);
-#endif
/* work with hotplug and coldplug */
MODULE_ALIAS("platform:ocores-i2c");
@@ -411,9 +405,7 @@ static struct platform_driver ocores_i2c_driver = {
.driver = {
.owner = THIS_MODULE,
.name = "ocores-i2c",
-#ifdef CONFIG_OF
- .of_match_table = ocores_i2c_match,
-#endif
+ .of_match_table = ocores_i2c_match,
},
};
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index b605ff3a1fa0..58a58c7eaa17 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -378,9 +378,7 @@ static int omap_i2c_init(struct omap_i2c_dev *dev)
* REVISIT: Some wkup sources might not be needed.
*/
dev->westate = OMAP_I2C_WE_ALL;
- if (dev->rev < OMAP_I2C_REV_ON_4430)
- omap_i2c_write_reg(dev, OMAP_I2C_WE_REG,
- dev->westate);
+ omap_i2c_write_reg(dev, OMAP_I2C_WE_REG, dev->westate);
}
}
omap_i2c_write_reg(dev, OMAP_I2C_CON_REG, 0);
@@ -847,11 +845,15 @@ complete:
dev_err(dev->dev, "Arbitration lost\n");
err |= OMAP_I2C_STAT_AL;
}
+ /*
+ * ProDB0017052: Clear ARDY bit twice
+ */
if (stat & (OMAP_I2C_STAT_ARDY | OMAP_I2C_STAT_NACK |
OMAP_I2C_STAT_AL)) {
omap_i2c_ack_stat(dev, stat &
(OMAP_I2C_STAT_RRDY | OMAP_I2C_STAT_RDR |
- OMAP_I2C_STAT_XRDY | OMAP_I2C_STAT_XDR));
+ OMAP_I2C_STAT_XRDY | OMAP_I2C_STAT_XDR |
+ OMAP_I2C_STAT_ARDY));
omap_i2c_complete_cmd(dev, err);
return IRQ_HANDLED;
}
@@ -1137,12 +1139,41 @@ omap_i2c_remove(struct platform_device *pdev)
return 0;
}
+#ifdef CONFIG_SUSPEND
+static int omap_i2c_suspend(struct device *dev)
+{
+ if (!pm_runtime_suspended(dev))
+ if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend)
+ dev->bus->pm->runtime_suspend(dev);
+
+ return 0;
+}
+
+static int omap_i2c_resume(struct device *dev)
+{
+ if (!pm_runtime_suspended(dev))
+ if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume)
+ dev->bus->pm->runtime_resume(dev);
+
+ return 0;
+}
+
+static struct dev_pm_ops omap_i2c_pm_ops = {
+ .suspend = omap_i2c_suspend,
+ .resume = omap_i2c_resume,
+};
+#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops)
+#else
+#define OMAP_I2C_PM_OPS NULL
+#endif
+
static struct platform_driver omap_i2c_driver = {
.probe = omap_i2c_probe,
.remove = omap_i2c_remove,
.driver = {
.name = "omap_i2c",
.owner = THIS_MODULE,
+ .pm = OMAP_I2C_PM_OPS,
},
};
diff --git a/drivers/i2c/busses/i2c-stu300.c b/drivers/i2c/busses/i2c-stu300.c
index 495be451d326..266135ddf7fa 100644
--- a/drivers/i2c/busses/i2c-stu300.c
+++ b/drivers/i2c/busses/i2c-stu300.c
@@ -942,7 +942,7 @@ stu300_probe(struct platform_device *pdev)
adap->owner = THIS_MODULE;
/* DDC class but actually often used for more generic I2C */
adap->class = I2C_CLASS_DDC;
- strncpy(adap->name, "ST Microelectronics DDC I2C adapter",
+ strlcpy(adap->name, "ST Microelectronics DDC I2C adapter",
sizeof(adap->name));
adap->nr = bus_nr;
adap->algo = &stu300_algo;
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index f0bd5bcdf563..045ba6efea48 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -537,9 +537,7 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
client->dev.parent = &client->adapter->dev;
client->dev.bus = &i2c_bus_type;
client->dev.type = &i2c_client_type;
-#ifdef CONFIG_OF
client->dev.of_node = info->of_node;
-#endif
dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
client->addr);
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 1fa091e05690..4a5c4a44ffb1 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -62,6 +62,7 @@
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <asm/mwait.h>
+#include <asm/msr.h>
#define INTEL_IDLE_VERSION "0.4"
#define PREFIX "intel_idle: "
@@ -85,6 +86,12 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state);
static struct cpuidle_state *cpuidle_state_table;
/*
+ * Hardware C-state auto-demotion may not always be optimal.
+ * Indicate which enable bits to clear here.
+ */
+static unsigned long long auto_demotion_disable_flags;
+
+/*
* Set this flag for states where the HW flushes the TLB for us
* and so we don't need cross-calls to keep it consistent.
* If this flag is set, SW flushes the TLB, so even if the
@@ -281,6 +288,15 @@ static struct notifier_block setup_broadcast_notifier = {
.notifier_call = setup_broadcast_cpuhp_notify,
};
+static void auto_demotion_disable(void *dummy)
+{
+ unsigned long long msr_bits;
+
+ rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
+ msr_bits &= ~auto_demotion_disable_flags;
+ wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
+}
+
/*
* intel_idle_probe()
*/
@@ -324,11 +340,17 @@ static int intel_idle_probe(void)
case 0x25: /* Westmere */
case 0x2C: /* Westmere */
cpuidle_state_table = nehalem_cstates;
+ auto_demotion_disable_flags =
+ (NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE);
break;
case 0x1C: /* 28 - Atom Processor */
+ cpuidle_state_table = atom_cstates;
+ break;
+
case 0x26: /* 38 - Lincroft Atom Processor */
cpuidle_state_table = atom_cstates;
+ auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE;
break;
case 0x2A: /* SNB */
@@ -436,6 +458,8 @@ static int intel_idle_cpuidle_devices_init(void)
return -EIO;
}
}
+ if (auto_demotion_disable_flags)
+ smp_call_function(auto_demotion_disable, NULL, 1);
return 0;
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 64e0903091a8..f804e28e1ebb 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1988,6 +1988,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
goto out;
}
+ if (cm_id->lap_state == IB_CM_LAP_SENT ||
+ cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+
ret = cm_alloc_msg(cm_id_priv, &msg);
if (ret) {
cm_enter_timewait(cm_id_priv);
@@ -2129,6 +2133,10 @@ static int cm_dreq_handler(struct cm_work *work)
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
break;
case IB_CM_ESTABLISHED:
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
+ cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ break;
case IB_CM_MRA_REP_RCVD:
break;
case IB_CM_TIMEWAIT:
@@ -2349,9 +2357,18 @@ static int cm_rej_handler(struct cm_work *work)
/* fall through */
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- case IB_CM_ESTABLISHED:
cm_enter_timewait(cm_id_priv);
break;
+ case IB_CM_ESTABLISHED:
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
+ cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
+ ib_cancel_mad(cm_id_priv->av.port->mad_agent,
+ cm_id_priv->msg);
+ cm_enter_timewait(cm_id_priv);
+ break;
+ }
+ /* fall through */
default:
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
@@ -2989,6 +3006,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
goto out; /* No match. */
}
atomic_inc(&cur_cm_id_priv->refcount);
+ atomic_inc(&cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6884da24fde1..5ed9d25d021a 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -308,11 +308,13 @@ static inline void release_mc(struct kref *kref)
kfree(mc);
}
-static void cma_detach_from_dev(struct rdma_id_private *id_priv)
+static void cma_release_dev(struct rdma_id_private *id_priv)
{
+ mutex_lock(&lock);
list_del(&id_priv->list);
cma_deref_dev(id_priv->cma_dev);
id_priv->cma_dev = NULL;
+ mutex_unlock(&lock);
}
static int cma_set_qkey(struct rdma_id_private *id_priv)
@@ -373,6 +375,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
+ mutex_lock(&lock);
iboe_addr_get_sgid(dev_addr, &iboe_gid);
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof gid);
@@ -398,6 +401,7 @@ out:
if (!ret)
cma_attach_to_dev(id_priv, cma_dev);
+ mutex_unlock(&lock);
return ret;
}
@@ -904,9 +908,14 @@ void rdma_destroy_id(struct rdma_cm_id *id)
state = cma_exch(id_priv, CMA_DESTROYING);
cma_cancel_operation(id_priv, state);
- mutex_lock(&lock);
+ /*
+ * Wait for any active callback to finish. New callbacks will find
+ * the id_priv state set to destroying and abort.
+ */
+ mutex_lock(&id_priv->handler_mutex);
+ mutex_unlock(&id_priv->handler_mutex);
+
if (id_priv->cma_dev) {
- mutex_unlock(&lock);
switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
case RDMA_TRANSPORT_IB:
if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
@@ -920,10 +929,8 @@ void rdma_destroy_id(struct rdma_cm_id *id)
break;
}
cma_leave_mc_groups(id_priv);
- mutex_lock(&lock);
- cma_detach_from_dev(id_priv);
+ cma_release_dev(id_priv);
}
- mutex_unlock(&lock);
cma_release_port(id_priv);
cma_deref_id(id_priv);
@@ -1200,9 +1207,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
- mutex_lock(&lock);
ret = cma_acquire_dev(conn_id);
- mutex_unlock(&lock);
if (ret)
goto release_conn_id;
@@ -1210,6 +1215,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
cm_id->context = conn_id;
cm_id->cm_handler = cma_ib_handler;
+ /*
+ * Protect against the user destroying conn_id from another thread
+ * until we're done accessing it.
+ */
+ atomic_inc(&conn_id->refcount);
ret = conn_id->id.event_handler(&conn_id->id, &event);
if (!ret) {
/*
@@ -1222,8 +1232,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
mutex_unlock(&lock);
mutex_unlock(&conn_id->handler_mutex);
+ cma_deref_id(conn_id);
goto out;
}
+ cma_deref_id(conn_id);
/* Destroy the CM ID by returning a non-zero value. */
conn_id->cm_id.ib = NULL;
@@ -1394,9 +1406,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out;
}
- mutex_lock(&lock);
ret = cma_acquire_dev(conn_id);
- mutex_unlock(&lock);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
@@ -1425,17 +1435,25 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
event.param.conn.private_data_len = iw_event->private_data_len;
event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
event.param.conn.responder_resources = attr.max_qp_rd_atom;
+
+ /*
+ * Protect against the user destroying conn_id from another thread
+ * until we're done accessing it.
+ */
+ atomic_inc(&conn_id->refcount);
ret = conn_id->id.event_handler(&conn_id->id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
cma_exch(conn_id, CMA_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
+ cma_deref_id(conn_id);
rdma_destroy_id(&conn_id->id);
goto out;
}
mutex_unlock(&conn_id->handler_mutex);
+ cma_deref_id(conn_id);
out:
if (dev)
@@ -1951,20 +1969,11 @@ static void addr_handler(int status, struct sockaddr *src_addr,
memset(&event, 0, sizeof event);
mutex_lock(&id_priv->handler_mutex);
-
- /*
- * Grab mutex to block rdma_destroy_id() from removing the device while
- * we're trying to acquire it.
- */
- mutex_lock(&lock);
- if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
- mutex_unlock(&lock);
+ if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
goto out;
- }
if (!status && !id_priv->cma_dev)
status = cma_acquire_dev(id_priv);
- mutex_unlock(&lock);
if (status) {
if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
@@ -2265,9 +2274,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err1;
- mutex_lock(&lock);
ret = cma_acquire_dev(id_priv);
- mutex_unlock(&lock);
if (ret)
goto err1;
}
@@ -2279,11 +2286,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0;
err2:
- if (id_priv->cma_dev) {
- mutex_lock(&lock);
- cma_detach_from_dev(id_priv);
- mutex_unlock(&lock);
- }
+ if (id_priv->cma_dev)
+ cma_release_dev(id_priv);
err1:
cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
return ret;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 8b00e6c46f01..b4d9e4caf3c9 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -61,9 +61,9 @@ static char *states[] = {
NULL,
};
-static int dack_mode;
+static int dack_mode = 1;
module_param(dack_mode, int, 0644);
-MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)");
+MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
int c4iw_max_read_depth = 8;
module_param(c4iw_max_read_depth, int, 0644);
@@ -482,6 +482,7 @@ static int send_connect(struct c4iw_ep *ep)
TX_CHAN(ep->tx_chan) |
SMAC_SEL(ep->smac_idx) |
DSCP(ep->tos) |
+ ULP_MODE(ULP_MODE_TCPDDP) |
RCV_BUFSIZ(rcv_win>>10);
opt2 = RX_CHANNEL(0) |
RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
@@ -1274,6 +1275,7 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
TX_CHAN(ep->tx_chan) |
SMAC_SEL(ep->smac_idx) |
DSCP(ep->tos) |
+ ULP_MODE(ULP_MODE_TCPDDP) |
RCV_BUFSIZ(rcv_win>>10);
opt2 = RX_CHANNEL(0) |
RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 54fbc1118abe..e29172c2afcb 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -87,17 +87,22 @@ static int dump_qp(int id, void *p, void *data)
return 1;
if (qp->ep)
- cc = snprintf(qpd->buf + qpd->pos, space, "qp id %u state %u "
+ cc = snprintf(qpd->buf + qpd->pos, space,
+ "qp sq id %u rq id %u state %u onchip %u "
"ep tid %u state %u %pI4:%u->%pI4:%u\n",
- qp->wq.sq.qid, (int)qp->attr.state,
+ qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state,
+ qp->wq.sq.flags & T4_SQ_ONCHIP,
qp->ep->hwtid, (int)qp->ep->com.state,
&qp->ep->com.local_addr.sin_addr.s_addr,
ntohs(qp->ep->com.local_addr.sin_port),
&qp->ep->com.remote_addr.sin_addr.s_addr,
ntohs(qp->ep->com.remote_addr.sin_port));
else
- cc = snprintf(qpd->buf + qpd->pos, space, "qp id %u state %u\n",
- qp->wq.sq.qid, (int)qp->attr.state);
+ cc = snprintf(qpd->buf + qpd->pos, space,
+ "qp sq id %u rq id %u state %u onchip %u\n",
+ qp->wq.sq.qid, qp->wq.rq.qid,
+ (int)qp->attr.state,
+ qp->wq.sq.flags & T4_SQ_ONCHIP);
if (cc < space)
qpd->pos += cc;
return 0;
@@ -368,7 +373,6 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
static void c4iw_remove(struct c4iw_dev *dev)
{
PDBG("%s c4iw_dev %p\n", __func__, dev);
- cancel_delayed_work_sync(&dev->db_drop_task);
list_del(&dev->entry);
if (dev->registered)
c4iw_unregister_device(dev);
@@ -523,8 +527,16 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
case CXGB4_STATE_START_RECOVERY:
printk(KERN_INFO MOD "%s: Fatal Error\n",
pci_name(dev->rdev.lldi.pdev));
- if (dev->registered)
+ dev->rdev.flags |= T4_FATAL_ERROR;
+ if (dev->registered) {
+ struct ib_event event;
+
+ memset(&event, 0, sizeof event);
+ event.event = IB_EVENT_DEVICE_FATAL;
+ event.device = &dev->ibdev;
+ ib_dispatch_event(&event);
c4iw_unregister_device(dev);
+ }
break;
case CXGB4_STATE_DETACH:
printk(KERN_INFO MOD "%s: Detach\n",
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 2fe19ec9ba60..9f6166f59268 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -176,7 +176,6 @@ struct c4iw_dev {
struct idr mmidr;
spinlock_t lock;
struct list_head entry;
- struct delayed_work db_drop_task;
struct dentry *debugfs_root;
u8 registered;
};
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 4f0be25cab1a..70a5a3c646da 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -31,9 +31,9 @@
*/
#include "iw_cxgb4.h"
-static int ocqp_support;
+static int ocqp_support = 1;
module_param(ocqp_support, int, 0644);
-MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=0)");
+MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
{
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 70004425d695..24af12fc8228 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -507,8 +507,14 @@ static inline void t4_swcq_consume(struct t4_cq *cq)
static inline void t4_hwcq_consume(struct t4_cq *cq)
{
cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts;
- if (++cq->cidx_inc == cq->size)
+ if (++cq->cidx_inc == (cq->size >> 4)) {
+ u32 val;
+
+ val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) |
+ INGRESSQID(cq->cqid);
+ writel(val, cq->gts);
cq->cidx_inc = 0;
+ }
if (++cq->cidx == cq->size) {
cq->cidx = 0;
cq->gen ^= 1;
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index b8cb2f145ae4..8991677e9a08 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -557,6 +557,7 @@ static ssize_t store_reset(struct device *dev,
dev_info(dev,"Unit %d is disabled, can't reset\n",
dd->ipath_unit);
ret = -EINVAL;
+ goto bail;
}
ret = ipath_reset_device(dd->ipath_unit);
bail:
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 8b606fd64022..08c194861af5 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -2610,9 +2610,11 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
netif_carrier_on(nesvnic->netdev);
spin_lock(&nesvnic->port_ibevent_lock);
- if (nesdev->iw_status == 0) {
- nesdev->iw_status = 1;
- nes_port_ibevent(nesvnic);
+ if (nesvnic->of_device_registered) {
+ if (nesdev->iw_status == 0) {
+ nesdev->iw_status = 1;
+ nes_port_ibevent(nesvnic);
+ }
}
spin_unlock(&nesvnic->port_ibevent_lock);
}
@@ -2642,9 +2644,11 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
netif_carrier_off(nesvnic->netdev);
spin_lock(&nesvnic->port_ibevent_lock);
- if (nesdev->iw_status == 1) {
- nesdev->iw_status = 0;
- nes_port_ibevent(nesvnic);
+ if (nesvnic->of_device_registered) {
+ if (nesdev->iw_status == 1) {
+ nesdev->iw_status = 0;
+ nes_port_ibevent(nesvnic);
+ }
}
spin_unlock(&nesvnic->port_ibevent_lock);
}
@@ -2703,9 +2707,11 @@ void nes_recheck_link_status(struct work_struct *work)
netif_carrier_on(nesvnic->netdev);
spin_lock(&nesvnic->port_ibevent_lock);
- if (nesdev->iw_status == 0) {
- nesdev->iw_status = 1;
- nes_port_ibevent(nesvnic);
+ if (nesvnic->of_device_registered) {
+ if (nesdev->iw_status == 0) {
+ nesdev->iw_status = 1;
+ nes_port_ibevent(nesvnic);
+ }
}
spin_unlock(&nesvnic->port_ibevent_lock);
}
@@ -2723,9 +2729,11 @@ void nes_recheck_link_status(struct work_struct *work)
netif_carrier_off(nesvnic->netdev);
spin_lock(&nesvnic->port_ibevent_lock);
- if (nesdev->iw_status == 1) {
- nesdev->iw_status = 0;
- nes_port_ibevent(nesvnic);
+ if (nesvnic->of_device_registered) {
+ if (nesdev->iw_status == 1) {
+ nesdev->iw_status = 0;
+ nes_port_ibevent(nesvnic);
+ }
}
spin_unlock(&nesvnic->port_ibevent_lock);
}
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index b01809a82cb0..4a2d21e15a70 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -5582,9 +5582,16 @@ static void qsfp_7322_event(struct work_struct *work)
* even on failure to read cable information. We don't
* get here for QME, so IS_QME check not needed here.
*/
- le2 = (!ret && qd->cache.atten[1] >= qib_long_atten &&
- !ppd->dd->cspec->r1 && QSFP_IS_CU(qd->cache.tech)) ?
- LE2_5m : LE2_DEFAULT;
+ if (!ret && !ppd->dd->cspec->r1) {
+ if (QSFP_IS_ACTIVE_FAR(qd->cache.tech))
+ le2 = LE2_QME;
+ else if (qd->cache.atten[1] >= qib_long_atten &&
+ QSFP_IS_CU(qd->cache.tech))
+ le2 = LE2_5m;
+ else
+ le2 = LE2_DEFAULT;
+ } else
+ le2 = LE2_DEFAULT;
ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7));
init_txdds_table(ppd, 0);
}
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 5ad224e4a38b..8fd3df5bf04d 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -464,8 +464,9 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
memset(smp->data, 0, sizeof(smp->data));
/* Only return the mkey if the protection field allows it. */
- if (smp->method == IB_MGMT_METHOD_SET || ibp->mkey == smp->mkey ||
- ibp->mkeyprot == 0)
+ if (!(smp->method == IB_MGMT_METHOD_GET &&
+ ibp->mkey != smp->mkey &&
+ ibp->mkeyprot == 1))
pip->mkey = ibp->mkey;
pip->gid_prefix = ibp->gid_prefix;
lid = ppd->lid;
@@ -705,7 +706,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
lwe = pip->link_width_enabled;
if (lwe) {
if (lwe == 0xFF)
- lwe = ppd->link_width_supported;
+ set_link_width_enabled(ppd, ppd->link_width_supported);
else if (lwe >= 16 || (lwe & ~ppd->link_width_supported))
smp->status |= IB_SMP_INVALID_FIELD;
else if (lwe != ppd->link_width_enabled)
@@ -720,7 +721,8 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
* speeds.
*/
if (lse == 15)
- lse = ppd->link_speed_supported;
+ set_link_speed_enabled(ppd,
+ ppd->link_speed_supported);
else if (lse >= 8 || (lse & ~ppd->link_speed_supported))
smp->status |= IB_SMP_INVALID_FIELD;
else if (lse != ppd->link_speed_enabled)
@@ -849,7 +851,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
if (clientrereg)
pip->clientrereg_resv_subnetto |= 0x80;
- goto done;
+ goto get_only;
err:
smp->status |= IB_SMP_INVALID_FIELD;
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h
index 19b527bafd57..c109bbdc90ac 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.h
+++ b/drivers/infiniband/hw/qib/qib_qsfp.h
@@ -79,6 +79,8 @@
extern const char *const qib_qsfp_devtech[16];
/* Active Equalization includes fiber, copper full EQ, and copper near Eq */
#define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1)
+/* Active Equalization includes fiber, copper full EQ, and copper far Eq */
+#define QSFP_IS_ACTIVE_FAR(tech) ((0x32FF >> ((tech) >> 4)) & 1)
/* Attenuation should be valid for copper other than full/near Eq */
#define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1)
/* Length is only valid if technology is "copper" */
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 8245237b67ce..eca0c41f1226 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -1005,7 +1005,8 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
* there are still requests that haven't been acked.
*/
if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
- !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)))
+ !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) &&
+ (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
start_timer(qp);
while (qp->s_last != qp->s_acked) {
@@ -1439,6 +1440,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
}
spin_lock_irqsave(&qp->s_lock, flags);
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ goto ack_done;
/* Ignore invalid responses. */
if (qib_cmp24(psn, qp->s_next_psn) >= 0)
diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index 23cf8fc933ec..5b8f59d6c3e8 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -360,7 +360,7 @@ static int gameport_queue_event(void *object, struct module *owner,
event->owner = owner;
list_add_tail(&event->node, &gameport_event_list);
- schedule_work(&gameport_event_work);
+ queue_work(system_long_wq, &gameport_event_work);
out:
spin_unlock_irqrestore(&gameport_event_lock, flags);
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index ac471b77c18e..99ce9032d08c 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -71,8 +71,9 @@ struct tegra_kbc {
spinlock_t lock;
unsigned int repoll_dly;
unsigned long cp_dly_jiffies;
+ bool use_fn_map;
const struct tegra_kbc_platform_data *pdata;
- unsigned short keycode[KBC_MAX_KEY];
+ unsigned short keycode[KBC_MAX_KEY * 2];
unsigned short current_keys[KBC_MAX_KPENT];
unsigned int num_pressed_keys;
struct timer_list timer;
@@ -178,6 +179,40 @@ static const u32 tegra_kbc_default_keymap[] = {
KEY(15, 5, KEY_F2),
KEY(15, 6, KEY_CAPSLOCK),
KEY(15, 7, KEY_F6),
+
+ /* Software Handled Function Keys */
+ KEY(20, 0, KEY_KP7),
+
+ KEY(21, 0, KEY_KP9),
+ KEY(21, 1, KEY_KP8),
+ KEY(21, 2, KEY_KP4),
+ KEY(21, 4, KEY_KP1),
+
+ KEY(22, 1, KEY_KPSLASH),
+ KEY(22, 2, KEY_KP6),
+ KEY(22, 3, KEY_KP5),
+ KEY(22, 4, KEY_KP3),
+ KEY(22, 5, KEY_KP2),
+ KEY(22, 7, KEY_KP0),
+
+ KEY(27, 1, KEY_KPASTERISK),
+ KEY(27, 3, KEY_KPMINUS),
+ KEY(27, 4, KEY_KPPLUS),
+ KEY(27, 5, KEY_KPDOT),
+
+ KEY(28, 5, KEY_VOLUMEUP),
+
+ KEY(29, 3, KEY_HOME),
+ KEY(29, 4, KEY_END),
+ KEY(29, 5, KEY_BRIGHTNESSDOWN),
+ KEY(29, 6, KEY_VOLUMEDOWN),
+ KEY(29, 7, KEY_BRIGHTNESSUP),
+
+ KEY(30, 0, KEY_NUMLOCK),
+ KEY(30, 1, KEY_SCROLLLOCK),
+ KEY(30, 2, KEY_MUTE),
+
+ KEY(31, 4, KEY_HELP),
};
static const struct matrix_keymap_data tegra_kbc_default_keymap_data = {
@@ -224,6 +259,7 @@ static void tegra_kbc_report_keys(struct tegra_kbc *kbc)
unsigned int i;
unsigned int num_down = 0;
unsigned long flags;
+ bool fn_keypress = false;
spin_lock_irqsave(&kbc->lock, flags);
for (i = 0; i < KBC_MAX_KPENT; i++) {
@@ -237,11 +273,28 @@ static void tegra_kbc_report_keys(struct tegra_kbc *kbc)
MATRIX_SCAN_CODE(row, col, KBC_ROW_SHIFT);
scancodes[num_down] = scancode;
- keycodes[num_down++] = kbc->keycode[scancode];
+ keycodes[num_down] = kbc->keycode[scancode];
+ /* If driver uses Fn map, do not report the Fn key. */
+ if ((keycodes[num_down] == KEY_FN) && kbc->use_fn_map)
+ fn_keypress = true;
+ else
+ num_down++;
}
val >>= 8;
}
+
+ /*
+ * If the platform uses Fn keymaps, translate keys on a Fn keypress.
+ * Function keycodes are KBC_MAX_KEY apart from the plain keycodes.
+ */
+ if (fn_keypress) {
+ for (i = 0; i < num_down; i++) {
+ scancodes[i] += KBC_MAX_KEY;
+ keycodes[i] = kbc->keycode[scancodes[i]];
+ }
+ }
+
spin_unlock_irqrestore(&kbc->lock, flags);
tegra_kbc_report_released_keys(kbc->idev,
@@ -594,8 +647,11 @@ static int __devinit tegra_kbc_probe(struct platform_device *pdev)
input_dev->keycode = kbc->keycode;
input_dev->keycodesize = sizeof(kbc->keycode[0]);
- input_dev->keycodemax = ARRAY_SIZE(kbc->keycode);
+ input_dev->keycodemax = KBC_MAX_KEY;
+ if (pdata->use_fn_map)
+ input_dev->keycodemax *= 2;
+ kbc->use_fn_map = pdata->use_fn_map;
keymap_data = pdata->keymap_data ?: &tegra_kbc_default_keymap_data;
matrix_keypad_build_keymap(keymap_data, KBC_ROW_SHIFT,
input_dev->keycode, input_dev->keybit);
diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h
index 25e5d042a72c..7453938bf5ef 100644
--- a/drivers/input/mouse/synaptics.h
+++ b/drivers/input/mouse/synaptics.h
@@ -51,6 +51,29 @@
#define SYN_EXT_CAP_REQUESTS(c) (((c) & 0x700000) >> 20)
#define SYN_CAP_MULTI_BUTTON_NO(ec) (((ec) & 0x00f000) >> 12)
#define SYN_CAP_PRODUCT_ID(ec) (((ec) & 0xff0000) >> 16)
+
+/*
+ * The following describes response for the 0x0c query.
+ *
+ * byte mask name meaning
+ * ---- ---- ------- ------------
+ * 1 0x01 adjustable threshold capacitive button sensitivity
+ * can be adjusted
+ * 1 0x02 report max query 0x0d gives max coord reported
+ * 1 0x04 clearpad sensor is ClearPad product
+ * 1 0x08 advanced gesture not particularly meaningful
+ * 1 0x10 clickpad bit 0 1-button ClickPad
+ * 1 0x60 multifinger mode identifies firmware finger counting
+ * (not reporting!) algorithm.
+ * Not particularly meaningful
+ * 1 0x80 covered pad W clipped to 14, 15 == pad mostly covered
+ * 2 0x01 clickpad bit 1 2-button ClickPad
+ * 2 0x02 deluxe LED controls touchpad support LED commands
+ * ala multimedia control bar
+ * 2 0x04 reduced filtering firmware does less filtering on
+ * position data, driver should watch
+ * for noise.
+ */
#define SYN_CAP_CLICKPAD(ex0c) ((ex0c) & 0x100000) /* 1-button ClickPad */
#define SYN_CAP_CLICKPAD2BTN(ex0c) ((ex0c) & 0x000100) /* 2-button ClickPad */
#define SYN_CAP_MAX_DIMENSIONS(ex0c) ((ex0c) & 0x020000)
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index 7c38d1fbabf2..ba70058e2be3 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -299,7 +299,7 @@ static int serio_queue_event(void *object, struct module *owner,
event->owner = owner;
list_add_tail(&event->node, &serio_event_list);
- schedule_work(&serio_event_work);
+ queue_work(system_long_wq, &serio_event_work);
out:
spin_unlock_irqrestore(&serio_event_lock, flags);
diff --git a/drivers/input/touchscreen/tps6507x-ts.c b/drivers/input/touchscreen/tps6507x-ts.c
index c8c136cf7bbc..43031492d733 100644
--- a/drivers/input/touchscreen/tps6507x-ts.c
+++ b/drivers/input/touchscreen/tps6507x-ts.c
@@ -43,7 +43,6 @@ struct tps6507x_ts {
struct input_dev *input_dev;
struct device *dev;
char phys[32];
- struct workqueue_struct *wq;
struct delayed_work work;
unsigned polling; /* polling is active */
struct ts_event tc;
@@ -220,8 +219,8 @@ done:
poll = 1;
if (poll) {
- schd = queue_delayed_work(tsc->wq, &tsc->work,
- msecs_to_jiffies(tsc->poll_period));
+ schd = schedule_delayed_work(&tsc->work,
+ msecs_to_jiffies(tsc->poll_period));
if (schd)
tsc->polling = 1;
else {
@@ -303,7 +302,6 @@ static int tps6507x_ts_probe(struct platform_device *pdev)
tsc->input_dev = input_dev;
INIT_DELAYED_WORK(&tsc->work, tps6507x_ts_handler);
- tsc->wq = create_workqueue("TPS6507x Touchscreen");
if (init_data) {
tsc->poll_period = init_data->poll_period;
@@ -325,8 +323,8 @@ static int tps6507x_ts_probe(struct platform_device *pdev)
if (error)
goto err2;
- schd = queue_delayed_work(tsc->wq, &tsc->work,
- msecs_to_jiffies(tsc->poll_period));
+ schd = schedule_delayed_work(&tsc->work,
+ msecs_to_jiffies(tsc->poll_period));
if (schd)
tsc->polling = 1;
@@ -341,7 +339,6 @@ static int tps6507x_ts_probe(struct platform_device *pdev)
err2:
cancel_delayed_work_sync(&tsc->work);
- destroy_workqueue(tsc->wq);
input_free_device(input_dev);
err1:
kfree(tsc);
@@ -357,7 +354,6 @@ static int __devexit tps6507x_ts_remove(struct platform_device *pdev)
struct input_dev *input_dev = tsc->input_dev;
cancel_delayed_work_sync(&tsc->work);
- destroy_workqueue(tsc->wq);
input_unregister_device(input_dev);
diff --git a/drivers/isdn/hardware/eicon/istream.c b/drivers/isdn/hardware/eicon/istream.c
index 18f8798442fa..7bd5baa547be 100644
--- a/drivers/isdn/hardware/eicon/istream.c
+++ b/drivers/isdn/hardware/eicon/istream.c
@@ -62,7 +62,7 @@ void diva_xdi_provide_istream_info (ADAPTER* a,
stream interface.
If synchronous service was requested, then function
does return amount of data written to stream.
- 'final' does indicate that pice of data to be written is
+ 'final' does indicate that piece of data to be written is
final part of frame (necessary only by structured datatransfer)
return 0 if zero lengh packet was written
return -1 if stream is full
diff --git a/drivers/isdn/hisax/isdnl2.c b/drivers/isdn/hisax/isdnl2.c
index 0858791978d8..cfff0c41d298 100644
--- a/drivers/isdn/hisax/isdnl2.c
+++ b/drivers/isdn/hisax/isdnl2.c
@@ -1247,10 +1247,10 @@ static void
l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
{
struct PStack *st = fi->userdata;
- struct sk_buff *skb, *oskb;
+ struct sk_buff *skb;
struct Layer2 *l2 = &st->l2;
u_char header[MAX_HEADER_LEN];
- int i;
+ int i, hdr_space_needed;
int unsigned p1;
u_long flags;
@@ -1261,6 +1261,16 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
if (!skb)
return;
+ hdr_space_needed = l2headersize(l2, 0);
+ if (hdr_space_needed > skb_headroom(skb)) {
+ struct sk_buff *orig_skb = skb;
+
+ skb = skb_realloc_headroom(skb, hdr_space_needed);
+ if (!skb) {
+ dev_kfree_skb(orig_skb);
+ return;
+ }
+ }
spin_lock_irqsave(&l2->lock, flags);
if(test_bit(FLG_MOD128, &l2->flag))
p1 = (l2->vs - l2->va) % 128;
@@ -1285,19 +1295,7 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
l2->vs = (l2->vs + 1) % 8;
}
spin_unlock_irqrestore(&l2->lock, flags);
- p1 = skb->data - skb->head;
- if (p1 >= i)
- memcpy(skb_push(skb, i), header, i);
- else {
- printk(KERN_WARNING
- "isdl2 pull_iqueue skb header(%d/%d) too short\n", i, p1);
- oskb = skb;
- skb = alloc_skb(oskb->len + i, GFP_ATOMIC);
- memcpy(skb_put(skb, i), header, i);
- skb_copy_from_linear_data(oskb,
- skb_put(skb, oskb->len), oskb->len);
- dev_kfree_skb(oskb);
- }
+ memcpy(skb_push(skb, i), header, i);
st->l2.l2l1(st, PH_PULL | INDICATION, skb);
test_and_clear_bit(FLG_ACK_PEND, &st->l2.flag);
if (!test_and_set_bit(FLG_T200_RUN, &st->l2.flag)) {
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 8a2f767f26d8..0ed7f6bc2a7f 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -216,7 +216,6 @@ static int linear_run (mddev_t *mddev)
if (md_check_no_bitmap(mddev))
return -EINVAL;
- mddev->queue->queue_lock = &mddev->queue->__queue_lock;
conf = linear_conf(mddev, mddev->raid_disks);
if (!conf)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0cc30ecda4c1..d5ad7723b172 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -553,6 +553,9 @@ static mddev_t * mddev_find(dev_t unit)
{
mddev_t *mddev, *new = NULL;
+ if (unit && MAJOR(unit) != MD_MAJOR)
+ unit &= ~((1<<MdpMinorShift)-1);
+
retry:
spin_lock(&all_mddevs_lock);
@@ -4138,10 +4141,10 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
}
mddev->array_sectors = sectors;
- set_capacity(mddev->gendisk, mddev->array_sectors);
- if (mddev->pers)
+ if (mddev->pers) {
+ set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
-
+ }
return len;
}
@@ -4624,6 +4627,7 @@ static int do_md_run(mddev_t *mddev)
}
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
+ mddev->changed = 1;
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
out:
return err;
@@ -4712,6 +4716,7 @@ static void md_clean(mddev_t *mddev)
mddev->sync_speed_min = mddev->sync_speed_max = 0;
mddev->recovery = 0;
mddev->in_sync = 0;
+ mddev->changed = 0;
mddev->degraded = 0;
mddev->safemode = 0;
mddev->bitmap_info.offset = 0;
@@ -4827,6 +4832,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
set_capacity(disk, 0);
mutex_unlock(&mddev->open_mutex);
+ mddev->changed = 1;
revalidate_disk(disk);
if (mddev->ro)
@@ -6011,7 +6017,7 @@ static int md_open(struct block_device *bdev, fmode_t mode)
atomic_inc(&mddev->openers);
mutex_unlock(&mddev->open_mutex);
- check_disk_size_change(mddev->gendisk, bdev);
+ check_disk_change(bdev);
out:
return err;
}
@@ -6026,6 +6032,21 @@ static int md_release(struct gendisk *disk, fmode_t mode)
return 0;
}
+
+static int md_media_changed(struct gendisk *disk)
+{
+ mddev_t *mddev = disk->private_data;
+
+ return mddev->changed;
+}
+
+static int md_revalidate(struct gendisk *disk)
+{
+ mddev_t *mddev = disk->private_data;
+
+ mddev->changed = 0;
+ return 0;
+}
static const struct block_device_operations md_fops =
{
.owner = THIS_MODULE,
@@ -6036,6 +6057,8 @@ static const struct block_device_operations md_fops =
.compat_ioctl = md_compat_ioctl,
#endif
.getgeo = md_getgeo,
+ .media_changed = md_media_changed,
+ .revalidate_disk= md_revalidate,
};
static int md_thread(void * arg)
@@ -7338,7 +7361,7 @@ static int __init md_init(void)
{
int ret = -ENOMEM;
- md_wq = alloc_workqueue("md", WQ_RESCUER, 0);
+ md_wq = alloc_workqueue("md", WQ_MEM_RECLAIM, 0);
if (!md_wq)
goto err_wq;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 7e90b8593b2a..12215d437fcc 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -274,6 +274,8 @@ struct mddev_s
atomic_t active; /* general refcount */
atomic_t openers; /* number of active opens */
+ int changed; /* True if we might need to
+ * reread partition info */
int degraded; /* whether md should consider
* adding a spare
*/
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 6d7ddf32ef2e..3a62d440e27b 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -435,7 +435,6 @@ static int multipath_run (mddev_t *mddev)
* bookkeeping area. [whatever we allocate in multipath_run(),
* should be freed in multipath_stop()]
*/
- mddev->queue->queue_lock = &mddev->queue->__queue_lock;
conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL);
mddev->private = conf;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 637a96855edb..c0ac457f1218 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -361,7 +361,6 @@ static int raid0_run(mddev_t *mddev)
if (md_check_no_bitmap(mddev))
return -EINVAL;
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
- mddev->queue->queue_lock = &mddev->queue->__queue_lock;
/* if private is not null, we are here after takeover */
if (mddev->private == NULL) {
@@ -670,6 +669,7 @@ static void *raid0_takeover_raid1(mddev_t *mddev)
mddev->new_layout = 0;
mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */
mddev->delta_disks = 1 - mddev->raid_disks;
+ mddev->raid_disks = 1;
/* make sure it will be not marked as dirty */
mddev->recovery_cp = MaxSector;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a23ffa397ba9..06cd712807d0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -593,7 +593,10 @@ static int flush_pending_writes(conf_t *conf)
if (conf->pending_bio_list.head) {
struct bio *bio;
bio = bio_list_get(&conf->pending_bio_list);
+ /* Only take the spinlock to quiet a warning */
+ spin_lock(conf->mddev->queue->queue_lock);
blk_remove_plug(conf->mddev->queue);
+ spin_unlock(conf->mddev->queue->queue_lock);
spin_unlock_irq(&conf->device_lock);
/* flush any pending bitmap writes to
* disk before proceeding w/ I/O */
@@ -959,7 +962,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
atomic_inc(&r1_bio->remaining);
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
- blk_plug_device(mddev->queue);
+ blk_plug_device_unlocked(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
}
r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
@@ -2021,7 +2024,6 @@ static int run(mddev_t *mddev)
if (IS_ERR(conf))
return PTR_ERR(conf);
- mddev->queue->queue_lock = &conf->device_lock;
list_for_each_entry(rdev, &mddev->disks, same_set) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3b607b28741b..747d061d8e05 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -662,7 +662,10 @@ static int flush_pending_writes(conf_t *conf)
if (conf->pending_bio_list.head) {
struct bio *bio;
bio = bio_list_get(&conf->pending_bio_list);
+ /* Spinlock only taken to quiet a warning */
+ spin_lock(conf->mddev->queue->queue_lock);
blk_remove_plug(conf->mddev->queue);
+ spin_unlock(conf->mddev->queue->queue_lock);
spin_unlock_irq(&conf->device_lock);
/* flush any pending bitmap writes to disk
* before proceeding w/ I/O */
@@ -971,7 +974,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
atomic_inc(&r10_bio->remaining);
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
- blk_plug_device(mddev->queue);
+ blk_plug_device_unlocked(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
}
@@ -2304,8 +2307,6 @@ static int run(mddev_t *mddev)
if (!conf)
goto out;
- mddev->queue->queue_lock = &conf->device_lock;
-
mddev->thread = conf->thread;
conf->thread = NULL;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 702812824195..78536fdbd87f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5204,7 +5204,6 @@ static int run(mddev_t *mddev)
mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = raid5_congested;
- mddev->queue->queue_lock = &conf->device_lock;
mddev->queue->unplug_fn = raid5_unplug_queue;
chunk_size = mddev->chunk_sectors << 9;
diff --git a/drivers/media/common/tuners/tda8290.c b/drivers/media/common/tuners/tda8290.c
index bc6a67768af1..8c4852114eeb 100644
--- a/drivers/media/common/tuners/tda8290.c
+++ b/drivers/media/common/tuners/tda8290.c
@@ -658,13 +658,13 @@ static int tda8290_probe(struct tuner_i2c_props *i2c_props)
#define TDA8290_ID 0x89
u8 reg = 0x1f, id;
struct i2c_msg msg_read[] = {
- { .addr = 0x4b, .flags = 0, .len = 1, .buf = &reg },
- { .addr = 0x4b, .flags = I2C_M_RD, .len = 1, .buf = &id },
+ { .addr = i2c_props->addr, .flags = 0, .len = 1, .buf = &reg },
+ { .addr = i2c_props->addr, .flags = I2C_M_RD, .len = 1, .buf = &id },
};
/* detect tda8290 */
if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) {
- printk(KERN_WARNING "%s: tda8290 couldn't read register 0x%02x\n",
+ printk(KERN_WARNING "%s: couldn't read register 0x%02x\n",
__func__, reg);
return -ENODEV;
}
@@ -685,13 +685,13 @@ static int tda8295_probe(struct tuner_i2c_props *i2c_props)
#define TDA8295C2_ID 0x8b
u8 reg = 0x2f, id;
struct i2c_msg msg_read[] = {
- { .addr = 0x4b, .flags = 0, .len = 1, .buf = &reg },
- { .addr = 0x4b, .flags = I2C_M_RD, .len = 1, .buf = &id },
+ { .addr = i2c_props->addr, .flags = 0, .len = 1, .buf = &reg },
+ { .addr = i2c_props->addr, .flags = I2C_M_RD, .len = 1, .buf = &id },
};
- /* detect tda8290 */
+ /* detect tda8295 */
if (i2c_transfer(i2c_props->adap, msg_read, 2) != 2) {
- printk(KERN_WARNING "%s: tda8290 couldn't read register 0x%02x\n",
+ printk(KERN_WARNING "%s: couldn't read register 0x%02x\n",
__func__, reg);
return -ENODEV;
}
diff --git a/drivers/media/dvb/dvb-usb/dib0700_devices.c b/drivers/media/dvb/dvb-usb/dib0700_devices.c
index defd83964ce2..193cdb77b76a 100644
--- a/drivers/media/dvb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/dvb/dvb-usb/dib0700_devices.c
@@ -870,6 +870,23 @@ static int dib7070p_tuner_attach(struct dvb_usb_adapter *adap)
return 0;
}
+static int stk7700p_pid_filter(struct dvb_usb_adapter *adapter, int index,
+ u16 pid, int onoff)
+{
+ struct dib0700_state *st = adapter->dev->priv;
+ if (st->is_dib7000pc)
+ return dib7000p_pid_filter(adapter->fe, index, pid, onoff);
+ return dib7000m_pid_filter(adapter->fe, index, pid, onoff);
+}
+
+static int stk7700p_pid_filter_ctrl(struct dvb_usb_adapter *adapter, int onoff)
+{
+ struct dib0700_state *st = adapter->dev->priv;
+ if (st->is_dib7000pc)
+ return dib7000p_pid_filter_ctrl(adapter->fe, onoff);
+ return dib7000m_pid_filter_ctrl(adapter->fe, onoff);
+}
+
static int stk70x0p_pid_filter(struct dvb_usb_adapter *adapter, int index, u16 pid, int onoff)
{
return dib7000p_pid_filter(adapter->fe, index, pid, onoff);
@@ -1875,8 +1892,8 @@ struct dvb_usb_device_properties dib0700_devices[] = {
{
.caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF,
.pid_filter_count = 32,
- .pid_filter = stk70x0p_pid_filter,
- .pid_filter_ctrl = stk70x0p_pid_filter_ctrl,
+ .pid_filter = stk7700p_pid_filter,
+ .pid_filter_ctrl = stk7700p_pid_filter_ctrl,
.frontend_attach = stk7700p_frontend_attach,
.tuner_attach = stk7700p_tuner_attach,
diff --git a/drivers/media/dvb/dvb-usb/lmedm04.c b/drivers/media/dvb/dvb-usb/lmedm04.c
index 9eea4188303b..46ccd01a7696 100644
--- a/drivers/media/dvb/dvb-usb/lmedm04.c
+++ b/drivers/media/dvb/dvb-usb/lmedm04.c
@@ -659,7 +659,7 @@ static int lme2510_download_firmware(struct usb_device *dev,
}
/* Default firmware for LME2510C */
-const char lme_firmware[50] = "dvb-usb-lme2510c-s7395.fw";
+char lme_firmware[50] = "dvb-usb-lme2510c-s7395.fw";
static void lme_coldreset(struct usb_device *dev)
{
@@ -1006,7 +1006,7 @@ static struct dvb_usb_device_properties lme2510c_properties = {
.caps = DVB_USB_IS_AN_I2C_ADAPTER,
.usb_ctrl = DEVICE_SPECIFIC,
.download_firmware = lme2510_download_firmware,
- .firmware = lme_firmware,
+ .firmware = (const char *)&lme_firmware,
.size_of_priv = sizeof(struct lme2510_state),
.num_adapters = 1,
.adapter = {
@@ -1109,5 +1109,5 @@ module_exit(lme2510_module_exit);
MODULE_AUTHOR("Malcolm Priestley <tvboxspy@gmail.com>");
MODULE_DESCRIPTION("LME2510(C) DVB-S USB2.0");
-MODULE_VERSION("1.74");
+MODULE_VERSION("1.75");
MODULE_LICENSE("GPL");
diff --git a/drivers/media/dvb/frontends/dib7000m.c b/drivers/media/dvb/frontends/dib7000m.c
index c7f5ccf54aa5..289a79837f24 100644
--- a/drivers/media/dvb/frontends/dib7000m.c
+++ b/drivers/media/dvb/frontends/dib7000m.c
@@ -1285,6 +1285,25 @@ struct i2c_adapter * dib7000m_get_i2c_master(struct dvb_frontend *demod, enum di
}
EXPORT_SYMBOL(dib7000m_get_i2c_master);
+int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe, u8 onoff)
+{
+ struct dib7000m_state *state = fe->demodulator_priv;
+ u16 val = dib7000m_read_word(state, 294 + state->reg_offs) & 0xffef;
+ val |= (onoff & 0x1) << 4;
+ dprintk("PID filter enabled %d", onoff);
+ return dib7000m_write_word(state, 294 + state->reg_offs, val);
+}
+EXPORT_SYMBOL(dib7000m_pid_filter_ctrl);
+
+int dib7000m_pid_filter(struct dvb_frontend *fe, u8 id, u16 pid, u8 onoff)
+{
+ struct dib7000m_state *state = fe->demodulator_priv;
+ dprintk("PID filter: index %x, PID %d, OnOff %d", id, pid, onoff);
+ return dib7000m_write_word(state, 300 + state->reg_offs + id,
+ onoff ? (1 << 13) | pid : 0);
+}
+EXPORT_SYMBOL(dib7000m_pid_filter);
+
#if 0
/* used with some prototype boards */
int dib7000m_i2c_enumeration(struct i2c_adapter *i2c, int no_of_demods,
diff --git a/drivers/media/dvb/frontends/dib7000m.h b/drivers/media/dvb/frontends/dib7000m.h
index 113819ce9f0d..81fcf2241c64 100644
--- a/drivers/media/dvb/frontends/dib7000m.h
+++ b/drivers/media/dvb/frontends/dib7000m.h
@@ -46,6 +46,8 @@ extern struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap,
extern struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *,
enum dibx000_i2c_interface,
int);
+extern int dib7000m_pid_filter(struct dvb_frontend *, u8 id, u16 pid, u8 onoff);
+extern int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe, u8 onoff);
#else
static inline
struct dvb_frontend *dib7000m_attach(struct i2c_adapter *i2c_adap,
@@ -63,6 +65,19 @@ struct i2c_adapter *dib7000m_get_i2c_master(struct dvb_frontend *demod,
printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
return NULL;
}
+static inline int dib7000m_pid_filter(struct dvb_frontend *fe, u8 id,
+ u16 pid, u8 onoff)
+{
+ printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
+ return -ENODEV;
+}
+
+static inline int dib7000m_pid_filter_ctrl(struct dvb_frontend *fe,
+ uint8_t onoff)
+{
+ printk(KERN_WARNING "%s: driver disabled by Kconfig\n", __func__);
+ return -ENODEV;
+}
#endif
/* TODO
diff --git a/drivers/media/dvb/mantis/mantis_pci.c b/drivers/media/dvb/mantis/mantis_pci.c
index 59feeb84aec7..10a432a79d00 100644
--- a/drivers/media/dvb/mantis/mantis_pci.c
+++ b/drivers/media/dvb/mantis/mantis_pci.c
@@ -22,7 +22,6 @@
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <asm/io.h>
-#include <asm/pgtable.h>
#include <asm/page.h>
#include <linux/kmod.h>
#include <linux/vmalloc.h>
diff --git a/drivers/media/rc/ir-raw.c b/drivers/media/rc/ir-raw.c
index 73230ff93b8a..01f258a2a57a 100644
--- a/drivers/media/rc/ir-raw.c
+++ b/drivers/media/rc/ir-raw.c
@@ -112,7 +112,7 @@ int ir_raw_event_store_edge(struct rc_dev *dev, enum raw_event_type type)
{
ktime_t now;
s64 delta; /* ns */
- struct ir_raw_event ev;
+ DEFINE_IR_RAW_EVENT(ev);
int rc = 0;
if (!dev->raw)
@@ -125,7 +125,6 @@ int ir_raw_event_store_edge(struct rc_dev *dev, enum raw_event_type type)
* being called for the first time, note that delta can't
* possibly be negative.
*/
- ev.duration = 0;
if (delta > IR_MAX_DURATION || !dev->raw->last_type)
type |= IR_START_EVENT;
else
diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c
index 6df0a4980645..e4f8eac7f717 100644
--- a/drivers/media/rc/mceusb.c
+++ b/drivers/media/rc/mceusb.c
@@ -148,6 +148,7 @@ enum mceusb_model_type {
MCE_GEN2_TX_INV,
POLARIS_EVK,
CX_HYBRID_TV,
+ MULTIFUNCTION,
};
struct mceusb_model {
@@ -155,9 +156,10 @@ struct mceusb_model {
u32 mce_gen2:1;
u32 mce_gen3:1;
u32 tx_mask_normal:1;
- u32 is_polaris:1;
u32 no_tx:1;
+ int ir_intfnum;
+
const char *rc_map; /* Allow specify a per-board map */
const char *name; /* per-board name */
};
@@ -179,7 +181,6 @@ static const struct mceusb_model mceusb_model[] = {
.tx_mask_normal = 1,
},
[POLARIS_EVK] = {
- .is_polaris = 1,
/*
* In fact, the EVK is shipped without
* remotes, but we should have something handy,
@@ -189,10 +190,13 @@ static const struct mceusb_model mceusb_model[] = {
.name = "Conexant Hybrid TV (cx231xx) MCE IR",
},
[CX_HYBRID_TV] = {
- .is_polaris = 1,
.no_tx = 1, /* tx isn't wired up at all */
.name = "Conexant Hybrid TV (cx231xx) MCE IR",
},
+ [MULTIFUNCTION] = {
+ .mce_gen2 = 1,
+ .ir_intfnum = 2,
+ },
};
static struct usb_device_id mceusb_dev_table[] = {
@@ -216,8 +220,9 @@ static struct usb_device_id mceusb_dev_table[] = {
{ USB_DEVICE(VENDOR_PHILIPS, 0x206c) },
/* Philips/Spinel plus IR transceiver for ASUS */
{ USB_DEVICE(VENDOR_PHILIPS, 0x2088) },
- /* Realtek MCE IR Receiver */
- { USB_DEVICE(VENDOR_REALTEK, 0x0161) },
+ /* Realtek MCE IR Receiver and card reader */
+ { USB_DEVICE(VENDOR_REALTEK, 0x0161),
+ .driver_info = MULTIFUNCTION },
/* SMK/Toshiba G83C0004D410 */
{ USB_DEVICE(VENDOR_SMK, 0x031d),
.driver_info = MCE_GEN2_TX_INV },
@@ -1101,7 +1106,7 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
bool is_gen3;
bool is_microsoft_gen1;
bool tx_mask_normal;
- bool is_polaris;
+ int ir_intfnum;
dev_dbg(&intf->dev, "%s called\n", __func__);
@@ -1110,13 +1115,11 @@ static int __devinit mceusb_dev_probe(struct usb_interface *intf,
is_gen3 = mceusb_model[model].mce_gen3;
is_microsoft_gen1 = mceusb_model[model].mce_gen1;
tx_mask_normal = mceusb_model[model].tx_mask_normal;
- is_polaris = mceusb_model[model].is_polaris;
+ ir_intfnum = mceusb_model[model].ir_intfnum;
- if (is_polaris) {
- /* Interface 0 is IR */
- if (idesc->desc.bInterfaceNumber)
- return -ENODEV;
- }
+ /* There are multi-function devices with non-IR interfaces */
+ if (idesc->desc.bInterfaceNumber != ir_intfnum)
+ return -ENODEV;
/* step through the endpoints to find first bulk in and out endpoint */
for (i = 0; i < idesc->desc.bNumEndpoints; ++i) {
diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c
index 273d9d674792..d4d64492a057 100644
--- a/drivers/media/rc/nuvoton-cir.c
+++ b/drivers/media/rc/nuvoton-cir.c
@@ -385,8 +385,9 @@ static void nvt_cir_regs_init(struct nvt_dev *nvt)
static void nvt_cir_wake_regs_init(struct nvt_dev *nvt)
{
- /* set number of bytes needed for wake key comparison (default 67) */
- nvt_cir_wake_reg_write(nvt, CIR_WAKE_FIFO_LEN, CIR_WAKE_FIFO_CMP_DEEP);
+ /* set number of bytes needed for wake from s3 (default 65) */
+ nvt_cir_wake_reg_write(nvt, CIR_WAKE_FIFO_CMP_BYTES,
+ CIR_WAKE_FIFO_CMP_DEEP);
/* set tolerance/variance allowed per byte during wake compare */
nvt_cir_wake_reg_write(nvt, CIR_WAKE_CMP_TOLERANCE,
diff --git a/drivers/media/rc/nuvoton-cir.h b/drivers/media/rc/nuvoton-cir.h
index 1df82351cb03..048135eea702 100644
--- a/drivers/media/rc/nuvoton-cir.h
+++ b/drivers/media/rc/nuvoton-cir.h
@@ -305,8 +305,11 @@ struct nvt_dev {
#define CIR_WAKE_IRFIFOSTS_RX_EMPTY 0x20
#define CIR_WAKE_IRFIFOSTS_RX_FULL 0x10
-/* CIR Wake FIFO buffer is 67 bytes long */
-#define CIR_WAKE_FIFO_LEN 67
+/*
+ * The CIR Wake FIFO buffer is 67 bytes long, but the stock remote wakes
+ * the system comparing only 65 bytes (fails with this set to 67)
+ */
+#define CIR_WAKE_FIFO_CMP_BYTES 65
/* CIR Wake byte comparison tolerance */
#define CIR_WAKE_CMP_TOLERANCE 5
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 512a2f4ada0e..5b4422ef4e6d 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -850,7 +850,7 @@ static ssize_t store_protocols(struct device *device,
count++;
} else {
for (i = 0; i < ARRAY_SIZE(proto_names); i++) {
- if (!strncasecmp(tmp, proto_names[i].name, strlen(proto_names[i].name))) {
+ if (!strcasecmp(tmp, proto_names[i].name)) {
tmp += strlen(proto_names[i].name);
mask = proto_names[i].type;
break;
diff --git a/drivers/media/video/au0828/au0828-video.c b/drivers/media/video/au0828/au0828-video.c
index e41e4ad5cc40..9c475c600fc9 100644
--- a/drivers/media/video/au0828/au0828-video.c
+++ b/drivers/media/video/au0828/au0828-video.c
@@ -1758,7 +1758,12 @@ static int vidioc_reqbufs(struct file *file, void *priv,
if (rc < 0)
return rc;
- return videobuf_reqbufs(&fh->vb_vidq, rb);
+ if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ rc = videobuf_reqbufs(&fh->vb_vidq, rb);
+ else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
+ rc = videobuf_reqbufs(&fh->vb_vbiq, rb);
+
+ return rc;
}
static int vidioc_querybuf(struct file *file, void *priv,
@@ -1772,7 +1777,12 @@ static int vidioc_querybuf(struct file *file, void *priv,
if (rc < 0)
return rc;
- return videobuf_querybuf(&fh->vb_vidq, b);
+ if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ rc = videobuf_querybuf(&fh->vb_vidq, b);
+ else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
+ rc = videobuf_querybuf(&fh->vb_vbiq, b);
+
+ return rc;
}
static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b)
@@ -1785,7 +1795,12 @@ static int vidioc_qbuf(struct file *file, void *priv, struct v4l2_buffer *b)
if (rc < 0)
return rc;
- return videobuf_qbuf(&fh->vb_vidq, b);
+ if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ rc = videobuf_qbuf(&fh->vb_vidq, b);
+ else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
+ rc = videobuf_qbuf(&fh->vb_vbiq, b);
+
+ return rc;
}
static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
@@ -1806,7 +1821,12 @@ static int vidioc_dqbuf(struct file *file, void *priv, struct v4l2_buffer *b)
dev->greenscreen_detected = 0;
}
- return videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK);
+ if (fh->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ rc = videobuf_dqbuf(&fh->vb_vidq, b, file->f_flags & O_NONBLOCK);
+ else if (fh->type == V4L2_BUF_TYPE_VBI_CAPTURE)
+ rc = videobuf_dqbuf(&fh->vb_vbiq, b, file->f_flags & O_NONBLOCK);
+
+ return rc;
}
static struct v4l2_file_operations au0828_v4l_fops = {
diff --git a/drivers/media/video/cx18/cx18-cards.c b/drivers/media/video/cx18/cx18-cards.c
index 87177733cf92..68ad1963f421 100644
--- a/drivers/media/video/cx18/cx18-cards.c
+++ b/drivers/media/video/cx18/cx18-cards.c
@@ -95,6 +95,53 @@ static const struct cx18_card cx18_card_hvr1600_esmt = {
.i2c = &cx18_i2c_std,
};
+static const struct cx18_card cx18_card_hvr1600_s5h1411 = {
+ .type = CX18_CARD_HVR_1600_S5H1411,
+ .name = "Hauppauge HVR-1600",
+ .comment = "Simultaneous Digital and Analog TV capture supported\n",
+ .v4l2_capabilities = CX18_CAP_ENCODER,
+ .hw_audio_ctrl = CX18_HW_418_AV,
+ .hw_muxer = CX18_HW_CS5345,
+ .hw_all = CX18_HW_TVEEPROM | CX18_HW_418_AV | CX18_HW_TUNER |
+ CX18_HW_CS5345 | CX18_HW_DVB | CX18_HW_GPIO_RESET_CTRL |
+ CX18_HW_Z8F0811_IR_HAUP,
+ .video_inputs = {
+ { CX18_CARD_INPUT_VID_TUNER, 0, CX18_AV_COMPOSITE7 },
+ { CX18_CARD_INPUT_SVIDEO1, 1, CX18_AV_SVIDEO1 },
+ { CX18_CARD_INPUT_COMPOSITE1, 1, CX18_AV_COMPOSITE3 },
+ { CX18_CARD_INPUT_SVIDEO2, 2, CX18_AV_SVIDEO2 },
+ { CX18_CARD_INPUT_COMPOSITE2, 2, CX18_AV_COMPOSITE4 },
+ },
+ .audio_inputs = {
+ { CX18_CARD_INPUT_AUD_TUNER,
+ CX18_AV_AUDIO8, CS5345_IN_1 | CS5345_MCLK_1_5 },
+ { CX18_CARD_INPUT_LINE_IN1,
+ CX18_AV_AUDIO_SERIAL1, CS5345_IN_2 },
+ { CX18_CARD_INPUT_LINE_IN2,
+ CX18_AV_AUDIO_SERIAL1, CS5345_IN_3 },
+ },
+ .radio_input = { CX18_CARD_INPUT_AUD_TUNER,
+ CX18_AV_AUDIO_SERIAL1, CS5345_IN_4 },
+ .ddr = {
+ /* ESMT M13S128324A-5B memory */
+ .chip_config = 0x003,
+ .refresh = 0x30c,
+ .timing1 = 0x44220e82,
+ .timing2 = 0x08,
+ .tune_lane = 0,
+ .initial_emrs = 0,
+ },
+ .gpio_init.initial_value = 0x3001,
+ .gpio_init.direction = 0x3001,
+ .gpio_i2c_slave_reset = {
+ .active_lo_mask = 0x3001,
+ .msecs_asserted = 10,
+ .msecs_recovery = 40,
+ .ir_reset_mask = 0x0001,
+ },
+ .i2c = &cx18_i2c_std,
+};
+
static const struct cx18_card cx18_card_hvr1600_samsung = {
.type = CX18_CARD_HVR_1600_SAMSUNG,
.name = "Hauppauge HVR-1600 (Preproduction)",
@@ -523,7 +570,8 @@ static const struct cx18_card *cx18_card_list[] = {
&cx18_card_toshiba_qosmio_dvbt,
&cx18_card_leadtek_pvr2100,
&cx18_card_leadtek_dvr3100h,
- &cx18_card_gotview_dvd3
+ &cx18_card_gotview_dvd3,
+ &cx18_card_hvr1600_s5h1411
};
const struct cx18_card *cx18_get_card(u16 index)
diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c
index 944af8adbe0c..b1c3cbd92743 100644
--- a/drivers/media/video/cx18/cx18-driver.c
+++ b/drivers/media/video/cx18/cx18-driver.c
@@ -157,6 +157,7 @@ MODULE_PARM_DESC(cardtype,
"\t\t\t 7 = Leadtek WinFast PVR2100\n"
"\t\t\t 8 = Leadtek WinFast DVR3100 H\n"
"\t\t\t 9 = GoTView PCI DVD3 Hybrid\n"
+ "\t\t\t 10 = Hauppauge HVR 1600 (S5H1411)\n"
"\t\t\t 0 = Autodetect (default)\n"
"\t\t\t-1 = Ignore this card\n\t\t");
MODULE_PARM_DESC(pal, "Set PAL standard: B, G, H, D, K, I, M, N, Nc, 60");
@@ -337,6 +338,7 @@ void cx18_read_eeprom(struct cx18 *cx, struct tveeprom *tv)
switch (cx->card->type) {
case CX18_CARD_HVR_1600_ESMT:
case CX18_CARD_HVR_1600_SAMSUNG:
+ case CX18_CARD_HVR_1600_S5H1411:
tveeprom_hauppauge_analog(&c, tv, eedata);
break;
case CX18_CARD_YUAN_MPC718:
@@ -365,7 +367,25 @@ static void cx18_process_eeprom(struct cx18 *cx)
from the model number. Use the cardtype module option if you
have one of these preproduction models. */
switch (tv.model) {
- case 74000 ... 74999:
+ case 74301: /* Retail models */
+ case 74321:
+ case 74351: /* OEM models */
+ case 74361:
+ /* Digital side is s5h1411/tda18271 */
+ cx->card = cx18_get_card(CX18_CARD_HVR_1600_S5H1411);
+ break;
+ case 74021: /* Retail models */
+ case 74031:
+ case 74041:
+ case 74141:
+ case 74541: /* OEM models */
+ case 74551:
+ case 74591:
+ case 74651:
+ case 74691:
+ case 74751:
+ case 74891:
+ /* Digital side is s5h1409/mxl5005s */
cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT);
break;
case 0x718:
@@ -377,7 +397,8 @@ static void cx18_process_eeprom(struct cx18 *cx)
CX18_ERR("Invalid EEPROM\n");
return;
default:
- CX18_ERR("Unknown model %d, defaulting to HVR-1600\n", tv.model);
+ CX18_ERR("Unknown model %d, defaulting to original HVR-1600 "
+ "(cardtype=1)\n", tv.model);
cx->card = cx18_get_card(CX18_CARD_HVR_1600_ESMT);
break;
}
diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h
index 306caac6d3fc..f736679d2517 100644
--- a/drivers/media/video/cx18/cx18-driver.h
+++ b/drivers/media/video/cx18/cx18-driver.h
@@ -85,7 +85,8 @@
#define CX18_CARD_LEADTEK_PVR2100 6 /* Leadtek WinFast PVR2100 */
#define CX18_CARD_LEADTEK_DVR3100H 7 /* Leadtek WinFast DVR3100 H */
#define CX18_CARD_GOTVIEW_PCI_DVD3 8 /* GoTView PCI DVD3 Hybrid */
-#define CX18_CARD_LAST 8
+#define CX18_CARD_HVR_1600_S5H1411 9 /* Hauppauge HVR 1600 s5h1411/tda18271*/
+#define CX18_CARD_LAST 9
#define CX18_ENC_STREAM_TYPE_MPG 0
#define CX18_ENC_STREAM_TYPE_TS 1
diff --git a/drivers/media/video/cx18/cx18-dvb.c b/drivers/media/video/cx18/cx18-dvb.c
index f0381d62518d..f41922bd4020 100644
--- a/drivers/media/video/cx18/cx18-dvb.c
+++ b/drivers/media/video/cx18/cx18-dvb.c
@@ -29,6 +29,8 @@
#include "cx18-gpio.h"
#include "s5h1409.h"
#include "mxl5005s.h"
+#include "s5h1411.h"
+#include "tda18271.h"
#include "zl10353.h"
#include <linux/firmware.h>
@@ -77,6 +79,32 @@ static struct s5h1409_config hauppauge_hvr1600_config = {
};
/*
+ * CX18_CARD_HVR_1600_S5H1411
+ */
+static struct s5h1411_config hcw_s5h1411_config = {
+ .output_mode = S5H1411_SERIAL_OUTPUT,
+ .gpio = S5H1411_GPIO_OFF,
+ .vsb_if = S5H1411_IF_44000,
+ .qam_if = S5H1411_IF_4000,
+ .inversion = S5H1411_INVERSION_ON,
+ .status_mode = S5H1411_DEMODLOCKING,
+ .mpeg_timing = S5H1411_MPEGTIMING_CONTINOUS_NONINVERTING_CLOCK,
+};
+
+static struct tda18271_std_map hauppauge_tda18271_std_map = {
+ .atsc_6 = { .if_freq = 5380, .agc_mode = 3, .std = 3,
+ .if_lvl = 6, .rfagc_top = 0x37 },
+ .qam_6 = { .if_freq = 4000, .agc_mode = 3, .std = 0,
+ .if_lvl = 6, .rfagc_top = 0x37 },
+};
+
+static struct tda18271_config hauppauge_tda18271_config = {
+ .std_map = &hauppauge_tda18271_std_map,
+ .gate = TDA18271_GATE_DIGITAL,
+ .output_opt = TDA18271_OUTPUT_LT_OFF,
+};
+
+/*
* CX18_CARD_LEADTEK_DVR3100H
*/
/* Information/confirmation of proper config values provided by Terry Wu */
@@ -244,6 +272,7 @@ static int cx18_dvb_start_feed(struct dvb_demux_feed *feed)
switch (cx->card->type) {
case CX18_CARD_HVR_1600_ESMT:
case CX18_CARD_HVR_1600_SAMSUNG:
+ case CX18_CARD_HVR_1600_S5H1411:
v = cx18_read_reg(cx, CX18_REG_DMUX_NUM_PORT_0_CONTROL);
v |= 0x00400000; /* Serial Mode */
v |= 0x00002000; /* Data Length - Byte */
@@ -455,6 +484,15 @@ static int dvb_register(struct cx18_stream *stream)
ret = 0;
}
break;
+ case CX18_CARD_HVR_1600_S5H1411:
+ dvb->fe = dvb_attach(s5h1411_attach,
+ &hcw_s5h1411_config,
+ &cx->i2c_adap[0]);
+ if (dvb->fe != NULL)
+ dvb_attach(tda18271_attach, dvb->fe,
+ 0x60, &cx->i2c_adap[0],
+ &hauppauge_tda18271_config);
+ break;
case CX18_CARD_LEADTEK_DVR3100H:
dvb->fe = dvb_attach(zl10353_attach,
&leadtek_dvr3100h_demod,
diff --git a/drivers/media/video/cx23885/cx23885-i2c.c b/drivers/media/video/cx23885/cx23885-i2c.c
index ed3d8f55029b..307ff543c254 100644
--- a/drivers/media/video/cx23885/cx23885-i2c.c
+++ b/drivers/media/video/cx23885/cx23885-i2c.c
@@ -122,10 +122,6 @@ static int i2c_sendbytes(struct i2c_adapter *i2c_adap,
if (!i2c_wait_done(i2c_adap))
goto eio;
- if (!i2c_slave_did_ack(i2c_adap)) {
- retval = -ENXIO;
- goto err;
- }
if (i2c_debug) {
printk(" <W %02x %02x", msg->addr << 1, msg->buf[0]);
if (!(ctrl & I2C_NOSTOP))
@@ -158,7 +154,6 @@ static int i2c_sendbytes(struct i2c_adapter *i2c_adap,
eio:
retval = -EIO;
- err:
if (i2c_debug)
printk(KERN_ERR " ERR: %d\n", retval);
return retval;
@@ -209,10 +204,6 @@ static int i2c_readbytes(struct i2c_adapter *i2c_adap,
if (!i2c_wait_done(i2c_adap))
goto eio;
- if (cnt == 0 && !i2c_slave_did_ack(i2c_adap)) {
- retval = -ENXIO;
- goto err;
- }
msg->buf[cnt] = cx_read(bus->reg_rdata) & 0xff;
if (i2c_debug) {
dprintk(1, " %02x", msg->buf[cnt]);
@@ -224,7 +215,6 @@ static int i2c_readbytes(struct i2c_adapter *i2c_adap,
eio:
retval = -EIO;
- err:
if (i2c_debug)
printk(KERN_ERR " ERR: %d\n", retval);
return retval;
diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c
index 6fc09dd41b9d..35796e035247 100644
--- a/drivers/media/video/cx25840/cx25840-core.c
+++ b/drivers/media/video/cx25840/cx25840-core.c
@@ -2015,7 +2015,8 @@ static int cx25840_probe(struct i2c_client *client,
kfree(state);
return err;
}
- v4l2_ctrl_cluster(2, &state->volume);
+ if (!is_cx2583x(state))
+ v4l2_ctrl_cluster(2, &state->volume);
v4l2_ctrl_handler_setup(&state->hdl);
if (client->dev.platform_data) {
diff --git a/drivers/media/video/ivtv/ivtv-irq.c b/drivers/media/video/ivtv/ivtv-irq.c
index 9b4faf009196..9c29e964d400 100644
--- a/drivers/media/video/ivtv/ivtv-irq.c
+++ b/drivers/media/video/ivtv/ivtv-irq.c
@@ -628,22 +628,66 @@ static void ivtv_irq_enc_pio_complete(struct ivtv *itv)
static void ivtv_irq_dma_err(struct ivtv *itv)
{
u32 data[CX2341X_MBOX_MAX_DATA];
+ u32 status;
del_timer(&itv->dma_timer);
+
ivtv_api_get_data(&itv->enc_mbox, IVTV_MBOX_DMA_END, 2, data);
+ status = read_reg(IVTV_REG_DMASTATUS);
IVTV_DEBUG_WARN("DMA ERROR %08x %08x %08x %d\n", data[0], data[1],
- read_reg(IVTV_REG_DMASTATUS), itv->cur_dma_stream);
- write_reg(read_reg(IVTV_REG_DMASTATUS) & 3, IVTV_REG_DMASTATUS);
+ status, itv->cur_dma_stream);
+ /*
+ * We do *not* write back to the IVTV_REG_DMASTATUS register to
+ * clear the error status, if either the encoder write (0x02) or
+ * decoder read (0x01) bus master DMA operation do not indicate
+ * completed. We can race with the DMA engine, which may have
+ * transitioned to completed status *after* we read the register.
+ * Setting a IVTV_REG_DMASTATUS flag back to "busy" status, after the
+ * DMA engine has completed, will cause the DMA engine to stop working.
+ */
+ status &= 0x3;
+ if (status == 0x3)
+ write_reg(status, IVTV_REG_DMASTATUS);
+
if (!test_bit(IVTV_F_I_UDMA, &itv->i_flags) &&
itv->cur_dma_stream >= 0 && itv->cur_dma_stream < IVTV_MAX_STREAMS) {
struct ivtv_stream *s = &itv->streams[itv->cur_dma_stream];
- /* retry */
- if (s->type >= IVTV_DEC_STREAM_TYPE_MPG)
+ if (s->type >= IVTV_DEC_STREAM_TYPE_MPG) {
+ /* retry */
+ /*
+ * FIXME - handle cases of DMA error similar to
+ * encoder below, except conditioned on status & 0x1
+ */
ivtv_dma_dec_start(s);
- else
- ivtv_dma_enc_start(s);
- return;
+ return;
+ } else {
+ if ((status & 0x2) == 0) {
+ /*
+ * CX2341x Bus Master DMA write is ongoing.
+ * Reset the timer and let it complete.
+ */
+ itv->dma_timer.expires =
+ jiffies + msecs_to_jiffies(600);
+ add_timer(&itv->dma_timer);
+ return;
+ }
+
+ if (itv->dma_retries < 3) {
+ /*
+ * CX2341x Bus Master DMA write has ended.
+ * Retry the write, starting with the first
+ * xfer segment. Just retrying the current
+ * segment is not sufficient.
+ */
+ s->sg_processed = 0;
+ itv->dma_retries++;
+ ivtv_dma_enc_start_xfer(s);
+ return;
+ }
+ /* Too many retries, give up on this one */
+ }
+
}
if (test_bit(IVTV_F_I_UDMA, &itv->i_flags)) {
ivtv_udma_start(itv);
diff --git a/drivers/media/video/mem2mem_testdev.c b/drivers/media/video/mem2mem_testdev.c
index c179041d91f8..e7e717800ee2 100644
--- a/drivers/media/video/mem2mem_testdev.c
+++ b/drivers/media/video/mem2mem_testdev.c
@@ -1011,7 +1011,6 @@ static int m2mtest_remove(struct platform_device *pdev)
v4l2_m2m_release(dev->m2m_dev);
del_timer_sync(&dev->timer);
video_unregister_device(dev->vfd);
- video_device_release(dev->vfd);
v4l2_device_unregister(&dev->v4l2_dev);
kfree(dev);
diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index b63f8cafa671..561909b65ce6 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c
@@ -57,7 +57,7 @@
#include <linux/usb.h>
#define S2255_MAJOR_VERSION 1
-#define S2255_MINOR_VERSION 20
+#define S2255_MINOR_VERSION 21
#define S2255_RELEASE 0
#define S2255_VERSION KERNEL_VERSION(S2255_MAJOR_VERSION, \
S2255_MINOR_VERSION, \
@@ -312,9 +312,9 @@ struct s2255_fh {
};
/* current cypress EEPROM firmware version */
-#define S2255_CUR_USB_FWVER ((3 << 8) | 6)
+#define S2255_CUR_USB_FWVER ((3 << 8) | 11)
/* current DSP FW version */
-#define S2255_CUR_DSP_FWVER 8
+#define S2255_CUR_DSP_FWVER 10102
/* Need DSP version 5+ for video status feature */
#define S2255_MIN_DSP_STATUS 5
#define S2255_MIN_DSP_COLORFILTER 8
@@ -492,9 +492,11 @@ static void planar422p_to_yuv_packed(const unsigned char *in,
static void s2255_reset_dsppower(struct s2255_dev *dev)
{
- s2255_vendor_req(dev, 0x40, 0x0b0b, 0x0b0b, NULL, 0, 1);
+ s2255_vendor_req(dev, 0x40, 0x0b0b, 0x0b01, NULL, 0, 1);
msleep(10);
s2255_vendor_req(dev, 0x50, 0x0000, 0x0000, NULL, 0, 1);
+ msleep(600);
+ s2255_vendor_req(dev, 0x10, 0x0000, 0x0000, NULL, 0, 1);
return;
}
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index e9a3eab7b0cf..8c1d85e27be4 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -621,7 +621,7 @@ static int __init memstick_init(void)
{
int rc;
- workqueue = create_freezeable_workqueue("kmemstick");
+ workqueue = create_freezable_workqueue("kmemstick");
if (!workqueue)
return -ENOMEM;
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index f71f22948477..1735c84ff757 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -76,8 +76,8 @@
#define COPYRIGHT "Copyright (c) 1999-2008 " MODULEAUTHOR
#endif
-#define MPT_LINUX_VERSION_COMMON "3.04.17"
-#define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-3.04.17"
+#define MPT_LINUX_VERSION_COMMON "3.04.18"
+#define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-3.04.18"
#define WHAT_MAGIC_STRING "@" "(" "#" ")"
#define show_mptmod_ver(s,ver) \
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index a3856ed90aef..e8deb8ed0499 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -597,6 +597,13 @@ mptctl_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
}
static int
+mptctl_release(struct inode *inode, struct file *filep)
+{
+ fasync_helper(-1, filep, 0, &async_queue);
+ return 0;
+}
+
+static int
mptctl_fasync(int fd, struct file *filep, int mode)
{
MPT_ADAPTER *ioc;
@@ -2815,6 +2822,7 @@ static const struct file_operations mptctl_fops = {
.llseek = no_llseek,
.fasync = mptctl_fasync,
.unlocked_ioctl = mptctl_ioctl,
+ .release = mptctl_release,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_mpctl_ioctl,
#endif
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 59b8f53d1ece..0d9b82a44540 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -1873,8 +1873,9 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
}
out:
- printk(MYIOC_s_INFO_FMT "task abort: %s (sc=%p)\n",
- ioc->name, ((retval == SUCCESS) ? "SUCCESS" : "FAILED"), SCpnt);
+ printk(MYIOC_s_INFO_FMT "task abort: %s (rv=%04x) (sc=%p) (sn=%ld)\n",
+ ioc->name, ((retval == SUCCESS) ? "SUCCESS" : "FAILED"), retval,
+ SCpnt, SCpnt->serial_number);
return retval;
}
@@ -1911,7 +1912,7 @@ mptscsih_dev_reset(struct scsi_cmnd * SCpnt)
vdevice = SCpnt->device->hostdata;
if (!vdevice || !vdevice->vtarget) {
- retval = SUCCESS;
+ retval = 0;
goto out;
}
diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c
index a0421efe04ca..8a5b2d8f4daf 100644
--- a/drivers/message/i2o/driver.c
+++ b/drivers/message/i2o/driver.c
@@ -84,7 +84,8 @@ int i2o_driver_register(struct i2o_driver *drv)
osm_debug("Register driver %s\n", drv->name);
if (drv->event) {
- drv->event_queue = create_workqueue(drv->name);
+ drv->event_queue = alloc_workqueue(drv->name,
+ WQ_MEM_RECLAIM, 1);
if (!drv->event_queue) {
osm_err("Could not initialize event queue for driver "
"%s\n", drv->name);
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index 6a1f94042612..c45e6305b26f 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -143,9 +143,9 @@ static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc)
unsigned long flags;
struct asic3 *asic;
- desc->chip->ack(irq);
+ desc->irq_data.chip->irq_ack(&desc->irq_data);
- asic = desc->handler_data;
+ asic = get_irq_data(irq);
for (iter = 0 ; iter < MAX_ASIC_ISR_LOOPS; iter++) {
u32 status;
diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c
index 33c923d215c7..fdd8a1b8bc67 100644
--- a/drivers/mfd/davinci_voicecodec.c
+++ b/drivers/mfd/davinci_voicecodec.c
@@ -118,12 +118,12 @@ static int __init davinci_vc_probe(struct platform_device *pdev)
/* Voice codec interface client */
cell = &davinci_vc->cells[DAVINCI_VC_VCIF_CELL];
- cell->name = "davinci_vcif";
+ cell->name = "davinci-vcif";
cell->driver_data = davinci_vc;
/* Voice codec CQ93VC client */
cell = &davinci_vc->cells[DAVINCI_VC_CQ93VC_CELL];
- cell->name = "cq93vc";
+ cell->name = "cq93vc-codec";
cell->driver_data = davinci_vc;
ret = mfd_add_devices(&pdev->dev, pdev->id, davinci_vc->cells,
diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index 627cf577b16d..e9018d1394ee 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -150,12 +150,12 @@ static inline int __tps6586x_write(struct i2c_client *client,
static inline int __tps6586x_writes(struct i2c_client *client, int reg,
int len, uint8_t *val)
{
- int ret;
+ int ret, i;
- ret = i2c_smbus_write_i2c_block_data(client, reg, len, val);
- if (ret < 0) {
- dev_err(&client->dev, "failed writings to 0x%02x\n", reg);
- return ret;
+ for (i = 0; i < len; i++) {
+ ret = __tps6586x_write(client, reg + i, *(val + i));
+ if (ret < 0)
+ return ret;
}
return 0;
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 000cb414a78a..92b85e28a15e 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -385,12 +385,18 @@ static int ucb1x00_ts_add(struct ucb1x00_dev *dev)
idev->close = ucb1x00_ts_close;
__set_bit(EV_ABS, idev->evbit);
- __set_bit(ABS_X, idev->absbit);
- __set_bit(ABS_Y, idev->absbit);
- __set_bit(ABS_PRESSURE, idev->absbit);
input_set_drvdata(idev, ts);
+ ucb1x00_adc_enable(ts->ucb);
+ ts->x_res = ucb1x00_ts_read_xres(ts);
+ ts->y_res = ucb1x00_ts_read_yres(ts);
+ ucb1x00_adc_disable(ts->ucb);
+
+ input_set_abs_params(idev, ABS_X, 0, ts->x_res, 0, 0);
+ input_set_abs_params(idev, ABS_Y, 0, ts->y_res, 0, 0);
+ input_set_abs_params(idev, ABS_PRESSURE, 0, 0, 0, 0);
+
err = input_register_device(idev);
if (err)
goto fail;
diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 41233c7fa581..f4016a075fd6 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -246,6 +246,16 @@ static int wm8994_suspend(struct device *dev)
struct wm8994 *wm8994 = dev_get_drvdata(dev);
int ret;
+ /* Don't actually go through with the suspend if the CODEC is
+ * still active (eg, for audio passthrough from CP. */
+ ret = wm8994_reg_read(wm8994, WM8994_POWER_MANAGEMENT_1);
+ if (ret < 0) {
+ dev_err(dev, "Failed to read power status: %d\n", ret);
+ } else if (ret & WM8994_VMID_SEL_MASK) {
+ dev_dbg(dev, "CODEC still active, ignoring suspend\n");
+ return 0;
+ }
+
/* GPIO configuration state is saved here since we may be configuring
* the GPIO alternate functions even if we're not using the gpiolib
* driver for them.
@@ -261,6 +271,8 @@ static int wm8994_suspend(struct device *dev)
if (ret < 0)
dev_err(dev, "Failed to save LDO registers: %d\n", ret);
+ wm8994->suspended = true;
+
ret = regulator_bulk_disable(wm8994->num_supplies,
wm8994->supplies);
if (ret != 0) {
@@ -276,6 +288,10 @@ static int wm8994_resume(struct device *dev)
struct wm8994 *wm8994 = dev_get_drvdata(dev);
int ret;
+ /* We may have lied to the PM core about suspending */
+ if (!wm8994->suspended)
+ return 0;
+
ret = regulator_bulk_enable(wm8994->num_supplies,
wm8994->supplies);
if (ret != 0) {
@@ -298,6 +314,8 @@ static int wm8994_resume(struct device *dev)
if (ret < 0)
dev_err(dev, "Failed to restore GPIO registers: %d\n", ret);
+ wm8994->suspended = false;
+
return 0;
}
#endif
diff --git a/drivers/misc/bmp085.c b/drivers/misc/bmp085.c
index 63ee4c1a5315..b6e1c9a6679e 100644
--- a/drivers/misc/bmp085.c
+++ b/drivers/misc/bmp085.c
@@ -449,6 +449,7 @@ static const struct i2c_device_id bmp085_id[] = {
{ "bmp085", 0 },
{ }
};
+MODULE_DEVICE_TABLE(i2c, bmp085_id);
static struct i2c_driver bmp085_driver = {
.driver = {
diff --git a/drivers/misc/iwmc3200top/iwmc3200top.h b/drivers/misc/iwmc3200top/iwmc3200top.h
index 740ff0738ea8..620973ed8bf9 100644
--- a/drivers/misc/iwmc3200top/iwmc3200top.h
+++ b/drivers/misc/iwmc3200top/iwmc3200top.h
@@ -183,9 +183,7 @@ struct iwmct_priv {
u32 barker;
struct iwmct_dbg dbg;
- /* drivers work queue */
- struct workqueue_struct *wq;
- struct workqueue_struct *bus_rescan_wq;
+ /* drivers work items */
struct work_struct bus_rescan_worker;
struct work_struct isr_worker;
diff --git a/drivers/misc/iwmc3200top/main.c b/drivers/misc/iwmc3200top/main.c
index c73cef2c3c5e..727af07f1fbd 100644
--- a/drivers/misc/iwmc3200top/main.c
+++ b/drivers/misc/iwmc3200top/main.c
@@ -89,7 +89,7 @@ static void op_top_message(struct iwmct_priv *priv, struct top_msg *msg)
switch (msg->hdr.opcode) {
case OP_OPR_ALIVE:
LOG_INFO(priv, FW_MSG, "Got ALIVE from device, wake rescan\n");
- queue_work(priv->bus_rescan_wq, &priv->bus_rescan_worker);
+ schedule_work(&priv->bus_rescan_worker);
break;
default:
LOG_INFO(priv, FW_MSG, "Received msg opcode 0x%X\n",
@@ -360,7 +360,7 @@ static void iwmct_irq(struct sdio_func *func)
/* clear the function's interrupt request bit (write 1 to clear) */
sdio_writeb(func, 1, IWMC_SDIO_INTR_CLEAR_ADDR, &ret);
- queue_work(priv->wq, &priv->isr_worker);
+ schedule_work(&priv->isr_worker);
LOG_TRACE(priv, IRQ, "exit iwmct_irq\n");
@@ -506,10 +506,6 @@ static int iwmct_probe(struct sdio_func *func,
priv->func = func;
sdio_set_drvdata(func, priv);
-
- /* create drivers work queue */
- priv->wq = create_workqueue(DRV_NAME "_wq");
- priv->bus_rescan_wq = create_workqueue(DRV_NAME "_rescan_wq");
INIT_WORK(&priv->bus_rescan_worker, iwmct_rescan_worker);
INIT_WORK(&priv->isr_worker, iwmct_irq_read_worker);
@@ -604,9 +600,9 @@ static void iwmct_remove(struct sdio_func *func)
sdio_release_irq(func);
sdio_release_host(func);
- /* Safely destroy osc workqueue */
- destroy_workqueue(priv->bus_rescan_wq);
- destroy_workqueue(priv->wq);
+ /* Make sure works are finished */
+ flush_work_sync(&priv->bus_rescan_worker);
+ flush_work_sync(&priv->isr_worker);
sdio_claim_host(func);
sdio_disable_func(func);
diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c
index 5f6852dff40b..44d4475a09dd 100644
--- a/drivers/misc/tifm_core.c
+++ b/drivers/misc/tifm_core.c
@@ -329,7 +329,7 @@ static int __init tifm_init(void)
{
int rc;
- workqueue = create_freezeable_workqueue("tifm");
+ workqueue = create_freezable_workqueue("tifm");
if (!workqueue)
return -ENOMEM;
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 4d2ea8e80140..6df5a55da110 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -785,7 +785,7 @@ static int __init vmballoon_init(void)
if (x86_hyper != &x86_hyper_vmware)
return -ENODEV;
- vmballoon_wq = create_freezeable_workqueue("vmmemctl");
+ vmballoon_wq = create_freezable_workqueue("vmmemctl");
if (!vmballoon_wq) {
pr_err("failed to create workqueue\n");
return -ENOMEM;
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 6625c057be05..150b5f3cd401 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1529,7 +1529,7 @@ void mmc_rescan(struct work_struct *work)
* still present
*/
if (host->bus_ops && host->bus_ops->detect && !host->bus_dead
- && mmc_card_is_removable(host))
+ && !(host->caps & MMC_CAP_NONREMOVABLE))
host->bus_ops->detect(host);
/*
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 5c4a54d9b6a4..ebc62ad4cc56 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -792,7 +792,6 @@ int mmc_attach_sdio(struct mmc_host *host)
*/
mmc_release_host(host);
err = mmc_add_card(host->card);
- mmc_claim_host(host);
if (err)
goto remove_added;
@@ -805,12 +804,12 @@ int mmc_attach_sdio(struct mmc_host *host)
goto remove_added;
}
+ mmc_claim_host(host);
return 0;
remove_added:
/* Remove without lock if the device has been added. */
- mmc_release_host(host);
mmc_sdio_remove(host);
mmc_claim_host(host);
remove:
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index fd877f633dd2..2f7fc0c5146f 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1516,21 +1516,17 @@ static int __devexit mmc_spi_remove(struct spi_device *spi)
return 0;
}
-#if defined(CONFIG_OF)
static struct of_device_id mmc_spi_of_match_table[] __devinitdata = {
{ .compatible = "mmc-spi-slot", },
{},
};
-#endif
static struct spi_driver mmc_spi_driver = {
.driver = {
.name = "mmc_spi",
.bus = &spi_bus_type,
.owner = THIS_MODULE,
-#if defined(CONFIG_OF)
.of_match_table = mmc_spi_of_match_table,
-#endif
},
.probe = mmc_spi_probe,
.remove = __devexit_p(mmc_spi_remove),
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index a8c3e1c9b02a..4aaa88f8ab5f 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -1230,10 +1230,32 @@ static int inval_cache_and_wait_for_operation(
sleep_time = chip_op_time / 2;
for (;;) {
+ if (chip->state != chip_state) {
+ /* Someone's suspended the operation: sleep */
+ DECLARE_WAITQUEUE(wait, current);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ add_wait_queue(&chip->wq, &wait);
+ mutex_unlock(&chip->mutex);
+ schedule();
+ remove_wait_queue(&chip->wq, &wait);
+ mutex_lock(&chip->mutex);
+ continue;
+ }
+
status = map_read(map, cmd_adr);
if (map_word_andequal(map, status, status_OK, status_OK))
break;
+ if (chip->erase_suspended && chip_state == FL_ERASING) {
+ /* Erase suspend occured while sleep: reset timeout */
+ timeo = reset_timeo;
+ chip->erase_suspended = 0;
+ }
+ if (chip->write_suspended && chip_state == FL_WRITING) {
+ /* Write suspend occured while sleep: reset timeout */
+ timeo = reset_timeo;
+ chip->write_suspended = 0;
+ }
if (!timeo) {
map_write(map, CMD(0x70), cmd_adr);
chip->state = FL_STATUS;
@@ -1257,27 +1279,6 @@ static int inval_cache_and_wait_for_operation(
timeo--;
}
mutex_lock(&chip->mutex);
-
- while (chip->state != chip_state) {
- /* Someone's suspended the operation: sleep */
- DECLARE_WAITQUEUE(wait, current);
- set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&chip->wq, &wait);
- mutex_unlock(&chip->mutex);
- schedule();
- remove_wait_queue(&chip->wq, &wait);
- mutex_lock(&chip->mutex);
- }
- if (chip->erase_suspended && chip_state == FL_ERASING) {
- /* Erase suspend occured while sleep: reset timeout */
- timeo = reset_timeo;
- chip->erase_suspended = 0;
- }
- if (chip->write_suspended && chip_state == FL_WRITING) {
- /* Write suspend occured while sleep: reset timeout */
- timeo = reset_timeo;
- chip->write_suspended = 0;
- }
}
/* Done and happy. */
diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index d72a5fb2d041..4e1be51cc122 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -1935,14 +1935,14 @@ static void jedec_reset(u32 base, struct map_info *map, struct cfi_private *cfi)
}
-static int cfi_jedec_setup(struct cfi_private *p_cfi, int index)
+static int cfi_jedec_setup(struct map_info *map, struct cfi_private *cfi, int index)
{
int i,num_erase_regions;
uint8_t uaddr;
- if (! (jedec_table[index].devtypes & p_cfi->device_type)) {
+ if (!(jedec_table[index].devtypes & cfi->device_type)) {
DEBUG(MTD_DEBUG_LEVEL1, "Rejecting potential %s with incompatible %d-bit device type\n",
- jedec_table[index].name, 4 * (1<<p_cfi->device_type));
+ jedec_table[index].name, 4 * (1<<cfi->device_type));
return 0;
}
@@ -1950,27 +1950,28 @@ static int cfi_jedec_setup(struct cfi_private *p_cfi, int index)
num_erase_regions = jedec_table[index].nr_regions;
- p_cfi->cfiq = kmalloc(sizeof(struct cfi_ident) + num_erase_regions * 4, GFP_KERNEL);
- if (!p_cfi->cfiq) {
+ cfi->cfiq = kmalloc(sizeof(struct cfi_ident) + num_erase_regions * 4, GFP_KERNEL);
+ if (!cfi->cfiq) {
//xx printk(KERN_WARNING "%s: kmalloc failed for CFI ident structure\n", map->name);
return 0;
}
- memset(p_cfi->cfiq,0,sizeof(struct cfi_ident));
+ memset(cfi->cfiq, 0, sizeof(struct cfi_ident));
- p_cfi->cfiq->P_ID = jedec_table[index].cmd_set;
- p_cfi->cfiq->NumEraseRegions = jedec_table[index].nr_regions;
- p_cfi->cfiq->DevSize = jedec_table[index].dev_size;
- p_cfi->cfi_mode = CFI_MODE_JEDEC;
+ cfi->cfiq->P_ID = jedec_table[index].cmd_set;
+ cfi->cfiq->NumEraseRegions = jedec_table[index].nr_regions;
+ cfi->cfiq->DevSize = jedec_table[index].dev_size;
+ cfi->cfi_mode = CFI_MODE_JEDEC;
+ cfi->sector_erase_cmd = CMD(0x30);
for (i=0; i<num_erase_regions; i++){
- p_cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i];
+ cfi->cfiq->EraseRegionInfo[i] = jedec_table[index].regions[i];
}
- p_cfi->cmdset_priv = NULL;
+ cfi->cmdset_priv = NULL;
/* This may be redundant for some cases, but it doesn't hurt */
- p_cfi->mfr = jedec_table[index].mfr_id;
- p_cfi->id = jedec_table[index].dev_id;
+ cfi->mfr = jedec_table[index].mfr_id;
+ cfi->id = jedec_table[index].dev_id;
uaddr = jedec_table[index].uaddr;
@@ -1978,8 +1979,8 @@ static int cfi_jedec_setup(struct cfi_private *p_cfi, int index)
our brains explode when we see the datasheets talking about address
lines numbered from A-1 to A18. The CFI table has unlock addresses
in device-words according to the mode the device is connected in */
- p_cfi->addr_unlock1 = unlock_addrs[uaddr].addr1 / p_cfi->device_type;
- p_cfi->addr_unlock2 = unlock_addrs[uaddr].addr2 / p_cfi->device_type;
+ cfi->addr_unlock1 = unlock_addrs[uaddr].addr1 / cfi->device_type;
+ cfi->addr_unlock2 = unlock_addrs[uaddr].addr2 / cfi->device_type;
return 1; /* ok */
}
@@ -2175,7 +2176,7 @@ static int jedec_probe_chip(struct map_info *map, __u32 base,
"MTD %s(): matched device 0x%x,0x%x unlock_addrs: 0x%.4x 0x%.4x\n",
__func__, cfi->mfr, cfi->id,
cfi->addr_unlock1, cfi->addr_unlock2 );
- if (!cfi_jedec_setup(cfi, i))
+ if (!cfi_jedec_setup(map, cfi, i))
return 0;
goto ok_out;
}
diff --git a/drivers/mtd/maps/amd76xrom.c b/drivers/mtd/maps/amd76xrom.c
index 77d64ce19e9f..92de7e3a49a5 100644
--- a/drivers/mtd/maps/amd76xrom.c
+++ b/drivers/mtd/maps/amd76xrom.c
@@ -151,6 +151,7 @@ static int __devinit amd76xrom_init_one (struct pci_dev *pdev,
printk(KERN_ERR MOD_NAME
" %s(): Unable to register resource %pR - kernel bug?\n",
__func__, &window->rsrc);
+ return -EBUSY;
}
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index cb20c67995d8..e0a2373bf0e2 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -413,7 +413,6 @@ error3:
error2:
list_del(&new->list);
error1:
- kfree(new);
return ret;
}
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 15682ec8530e..28af71c61834 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -968,6 +968,6 @@ static void __exit omap_nand_exit(void)
module_init(omap_nand_init);
module_exit(omap_nand_exit);
-MODULE_ALIAS(DRIVER_NAME);
+MODULE_ALIAS("platform:" DRIVER_NAME);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Glue layer for NAND flash on TI OMAP boards");
diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c
index d9d7efbc77cc..6322d1fb5d62 100644
--- a/drivers/mtd/nand/r852.c
+++ b/drivers/mtd/nand/r852.c
@@ -930,7 +930,7 @@ int r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
init_completion(&dev->dma_done);
- dev->card_workqueue = create_freezeable_workqueue(DRV_NAME);
+ dev->card_workqueue = create_freezable_workqueue(DRV_NAME);
if (!dev->card_workqueue)
goto error9;
diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index e78914938c5c..ac08750748a3 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c
@@ -131,7 +131,7 @@ static struct platform_driver generic_onenand_driver = {
.remove = __devexit_p(generic_onenand_remove),
};
-MODULE_ALIAS(DRIVER_NAME);
+MODULE_ALIAS("platform:" DRIVER_NAME);
static int __init generic_onenand_init(void)
{
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index ac31f461cc1c..c849cacf4b2f 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -860,7 +860,7 @@ static void __exit omap2_onenand_exit(void)
module_init(omap2_onenand_init);
module_exit(omap2_onenand_exit);
-MODULE_ALIAS(DRIVER_NAME);
+MODULE_ALIAS("platform:" DRIVER_NAME);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>");
MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3");
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 67822cf6c025..ac0d6a8613b5 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -1258,7 +1258,7 @@ static struct mtd_blktrans_ops sm_ftl_ops = {
static __init int sm_module_init(void)
{
int error = 0;
- cache_flush_workqueue = create_freezeable_workqueue("smflush");
+ cache_flush_workqueue = create_freezable_workqueue("smflush");
if (IS_ERR(cache_flush_workqueue))
return PTR_ERR(cache_flush_workqueue);
diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index 39214e512452..7ca0eded2561 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c
@@ -425,11 +425,6 @@ static irqreturn_t ariadne_interrupt(int irq, void *data)
int csr0, boguscnt;
int handled = 0;
- if (dev == NULL) {
- printk(KERN_WARNING "ariadne_interrupt(): irq for unknown device.\n");
- return IRQ_NONE;
- }
-
lance->RAP = CSR0; /* PCnet-ISA Controller Status */
if (!(lance->RDP & INTR)) /* Check if any interrupt has been */
diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h
index 653c62475cb6..8849699c66c4 100644
--- a/drivers/net/bnx2x/bnx2x.h
+++ b/drivers/net/bnx2x/bnx2x.h
@@ -22,7 +22,7 @@
* (you will need to reboot afterwards) */
/* #define BNX2X_STOP_ON_ERROR */
-#define DRV_MODULE_VERSION "1.62.00-5"
+#define DRV_MODULE_VERSION "1.62.00-6"
#define DRV_MODULE_RELDATE "2011/01/30"
#define BNX2X_BC_VER 0x040200
@@ -1211,6 +1211,7 @@ struct bnx2x {
/* DCBX Negotation results */
struct dcbx_features dcbx_local_feat;
u32 dcbx_error;
+ u32 pending_max;
};
/**
@@ -1613,19 +1614,23 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms,
#define BNX2X_BTR 4
#define MAX_SPQ_PENDING 8
-
-/* CMNG constants
- derived from lab experiments, and not from system spec calculations !!! */
-#define DEF_MIN_RATE 100
-/* resolution of the rate shaping timer - 100 usec */
-#define RS_PERIODIC_TIMEOUT_USEC 100
-/* resolution of fairness algorithm in usecs -
- coefficient for calculating the actual t fair */
-#define T_FAIR_COEF 10000000
+/* CMNG constants, as derived from system spec calculations */
+/* default MIN rate in case VNIC min rate is configured to zero - 100Mbps */
+#define DEF_MIN_RATE 100
+/* resolution of the rate shaping timer - 400 usec */
+#define RS_PERIODIC_TIMEOUT_USEC 400
/* number of bytes in single QM arbitration cycle -
- coefficient for calculating the fairness timer */
-#define QM_ARB_BYTES 40000
-#define FAIR_MEM 2
+ * coefficient for calculating the fairness timer */
+#define QM_ARB_BYTES 160000
+/* resolution of Min algorithm 1:100 */
+#define MIN_RES 100
+/* how many bytes above threshold for the minimal credit of Min algorithm*/
+#define MIN_ABOVE_THRESH 32768
+/* Fairness algorithm integration time coefficient -
+ * for calculating the actual Tfair */
+#define T_FAIR_COEF ((MIN_ABOVE_THRESH + QM_ARB_BYTES) * 8 * MIN_RES)
+/* Memory of fairness algorithm . 2 cycles */
+#define FAIR_MEM 2
#define ATTN_NIG_FOR_FUNC (1L << 8)
diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
index 710ce5d04c53..a71b32940533 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/bnx2x/bnx2x_cmn.c
@@ -259,10 +259,44 @@ static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
#endif
}
+/* Timestamp option length allowed for TPA aggregation:
+ *
+ * nop nop kind length echo val
+ */
+#define TPA_TSTAMP_OPT_LEN 12
+/**
+ * Calculate the approximate value of the MSS for this
+ * aggregation using the first packet of it.
+ *
+ * @param bp
+ * @param parsing_flags Parsing flags from the START CQE
+ * @param len_on_bd Total length of the first packet for the
+ * aggregation.
+ */
+static inline u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
+ u16 len_on_bd)
+{
+ /* TPA arrgregation won't have an IP options and TCP options
+ * other than timestamp.
+ */
+ u16 hdrs_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct tcphdr);
+
+
+ /* Check if there was a TCP timestamp, if there is it's will
+ * always be 12 bytes length: nop nop kind length echo val.
+ *
+ * Otherwise FW would close the aggregation.
+ */
+ if (parsing_flags & PARSING_FLAGS_TIME_STAMP_EXIST_FLAG)
+ hdrs_len += TPA_TSTAMP_OPT_LEN;
+
+ return len_on_bd - hdrs_len;
+}
+
static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
struct sk_buff *skb,
struct eth_fast_path_rx_cqe *fp_cqe,
- u16 cqe_idx)
+ u16 cqe_idx, u16 parsing_flags)
{
struct sw_rx_page *rx_pg, old_rx_pg;
u16 len_on_bd = le16_to_cpu(fp_cqe->len_on_bd);
@@ -275,8 +309,8 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
/* This is needed in order to enable forwarding support */
if (frag_size)
- skb_shinfo(skb)->gso_size = min((u32)SGE_PAGE_SIZE,
- max(frag_size, (u32)len_on_bd));
+ skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp, parsing_flags,
+ len_on_bd);
#ifdef BNX2X_STOP_ON_ERROR
if (pages > min_t(u32, 8, MAX_SKB_FRAGS)*SGE_PAGE_SIZE*PAGES_PER_SGE) {
@@ -344,6 +378,8 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
if (likely(new_skb)) {
/* fix ip xsum and give it to the stack */
/* (no need to map the new skb) */
+ u16 parsing_flags =
+ le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags);
prefetch(skb);
prefetch(((char *)(skb)) + L1_CACHE_BYTES);
@@ -373,9 +409,9 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
}
if (!bnx2x_fill_frag_skb(bp, fp, skb,
- &cqe->fast_path_cqe, cqe_idx)) {
- if ((le16_to_cpu(cqe->fast_path_cqe.
- pars_flags.flags) & PARSING_FLAGS_VLAN))
+ &cqe->fast_path_cqe, cqe_idx,
+ parsing_flags)) {
+ if (parsing_flags & PARSING_FLAGS_VLAN)
__vlan_hwaccel_put_tag(skb,
le16_to_cpu(cqe->fast_path_cqe.
vlan_tag));
@@ -703,19 +739,20 @@ u16 bnx2x_get_mf_speed(struct bnx2x *bp)
{
u16 line_speed = bp->link_vars.line_speed;
if (IS_MF(bp)) {
- u16 maxCfg = (bp->mf_config[BP_VN(bp)] &
- FUNC_MF_CFG_MAX_BW_MASK) >>
- FUNC_MF_CFG_MAX_BW_SHIFT;
- /* Calculate the current MAX line speed limit for the DCC
- * capable devices
+ u16 maxCfg = bnx2x_extract_max_cfg(bp,
+ bp->mf_config[BP_VN(bp)]);
+
+ /* Calculate the current MAX line speed limit for the MF
+ * devices
*/
- if (IS_MF_SD(bp)) {
+ if (IS_MF_SI(bp))
+ line_speed = (line_speed * maxCfg) / 100;
+ else { /* SD mode */
u16 vn_max_rate = maxCfg * 100;
if (vn_max_rate < line_speed)
line_speed = vn_max_rate;
- } else /* IS_MF_SI(bp)) */
- line_speed = (line_speed * maxCfg) / 100;
+ }
}
return line_speed;
@@ -959,6 +996,23 @@ void bnx2x_free_skbs(struct bnx2x *bp)
bnx2x_free_rx_skbs(bp);
}
+void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value)
+{
+ /* load old values */
+ u32 mf_cfg = bp->mf_config[BP_VN(bp)];
+
+ if (value != bnx2x_extract_max_cfg(bp, mf_cfg)) {
+ /* leave all but MAX value */
+ mf_cfg &= ~FUNC_MF_CFG_MAX_BW_MASK;
+
+ /* set new MAX value */
+ mf_cfg |= (value << FUNC_MF_CFG_MAX_BW_SHIFT)
+ & FUNC_MF_CFG_MAX_BW_MASK;
+
+ bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, mf_cfg);
+ }
+}
+
static void bnx2x_free_msix_irqs(struct bnx2x *bp)
{
int i, offset = 1;
@@ -1427,6 +1481,11 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
bnx2x_set_eth_mac(bp, 1);
+ if (bp->pending_max) {
+ bnx2x_update_max_mf_config(bp, bp->pending_max);
+ bp->pending_max = 0;
+ }
+
if (bp->port.pmf)
bnx2x_initial_phy_init(bp, load_mode);
diff --git a/drivers/net/bnx2x/bnx2x_cmn.h b/drivers/net/bnx2x/bnx2x_cmn.h
index 03eb4d68e6bb..85ea7f26b51f 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/bnx2x/bnx2x_cmn.h
@@ -341,6 +341,15 @@ void bnx2x_dcbx_init(struct bnx2x *bp);
*/
int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state);
+/**
+ * Updates MAX part of MF configuration in HW
+ * (if required)
+ *
+ * @param bp
+ * @param value
+ */
+void bnx2x_update_max_mf_config(struct bnx2x *bp, u32 value);
+
/* dev_close main block */
int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode);
@@ -1044,4 +1053,24 @@ static inline void storm_memset_cmng(struct bnx2x *bp,
void bnx2x_acquire_phy_lock(struct bnx2x *bp);
void bnx2x_release_phy_lock(struct bnx2x *bp);
+/**
+ * Extracts MAX BW part from MF configuration.
+ *
+ * @param bp
+ * @param mf_cfg
+ *
+ * @return u16
+ */
+static inline u16 bnx2x_extract_max_cfg(struct bnx2x *bp, u32 mf_cfg)
+{
+ u16 max_cfg = (mf_cfg & FUNC_MF_CFG_MAX_BW_MASK) >>
+ FUNC_MF_CFG_MAX_BW_SHIFT;
+ if (!max_cfg) {
+ BNX2X_ERR("Illegal configuration detected for Max BW - "
+ "using 100 instead\n");
+ max_cfg = 100;
+ }
+ return max_cfg;
+}
+
#endif /* BNX2X_CMN_H */
diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c
index 5b44a8b48509..7e92f9d0dcfd 100644
--- a/drivers/net/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/bnx2x/bnx2x_ethtool.c
@@ -238,7 +238,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
speed |= (cmd->speed_hi << 16);
if (IS_MF_SI(bp)) {
- u32 param = 0;
+ u32 part;
u32 line_speed = bp->link_vars.line_speed;
/* use 10G if no link detected */
@@ -251,23 +251,22 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
REQ_BC_VER_4_SET_MF_BW);
return -EINVAL;
}
- if (line_speed < speed) {
- BNX2X_DEV_INFO("New speed should be less or equal "
- "to actual line speed\n");
+
+ part = (speed * 100) / line_speed;
+
+ if (line_speed < speed || !part) {
+ BNX2X_DEV_INFO("Speed setting should be in a range "
+ "from 1%% to 100%% "
+ "of actual line speed\n");
return -EINVAL;
}
- /* load old values */
- param = bp->mf_config[BP_VN(bp)];
-
- /* leave only MIN value */
- param &= FUNC_MF_CFG_MIN_BW_MASK;
- /* set new MAX value */
- param |= (((speed * 100) / line_speed)
- << FUNC_MF_CFG_MAX_BW_SHIFT)
- & FUNC_MF_CFG_MAX_BW_MASK;
+ if (bp->state != BNX2X_STATE_OPEN)
+ /* store value for following "load" */
+ bp->pending_max = part;
+ else
+ bnx2x_update_max_mf_config(bp, part);
- bnx2x_fw_command(bp, DRV_MSG_CODE_SET_MF_BW, param);
return 0;
}
@@ -1781,9 +1780,7 @@ static int bnx2x_test_nvram(struct bnx2x *bp)
{ 0x100, 0x350 }, /* manuf_info */
{ 0x450, 0xf0 }, /* feature_info */
{ 0x640, 0x64 }, /* upgrade_key_info */
- { 0x6a4, 0x64 },
{ 0x708, 0x70 }, /* manuf_key_info */
- { 0x778, 0x70 },
{ 0, 0 }
};
__be32 buf[0x350 / 4];
@@ -1933,11 +1930,11 @@ static void bnx2x_self_test(struct net_device *dev,
buf[4] = 1;
etest->flags |= ETH_TEST_FL_FAILED;
}
- if (bp->port.pmf)
- if (bnx2x_link_test(bp, is_serdes) != 0) {
- buf[5] = 1;
- etest->flags |= ETH_TEST_FL_FAILED;
- }
+
+ if (bnx2x_link_test(bp, is_serdes) != 0) {
+ buf[5] = 1;
+ etest->flags |= ETH_TEST_FL_FAILED;
+ }
#ifdef BNX2X_EXTRA_DEBUG
bnx2x_panic_dump(bp);
diff --git a/drivers/net/bnx2x/bnx2x_init.h b/drivers/net/bnx2x/bnx2x_init.h
index 5a268e9a0895..fa6dbe3f2058 100644
--- a/drivers/net/bnx2x/bnx2x_init.h
+++ b/drivers/net/bnx2x/bnx2x_init.h
@@ -241,7 +241,7 @@ static const struct {
/* Block IGU, MISC, PXP and PXP2 parity errors as long as we don't
* want to handle "system kill" flow at the moment.
*/
- BLOCK_PRTY_INFO(PXP, 0x3ffffff, 0x3ffffff, 0x3ffffff, 0x3ffffff),
+ BLOCK_PRTY_INFO(PXP, 0x7ffffff, 0x3ffffff, 0x3ffffff, 0x7ffffff),
BLOCK_PRTY_INFO_0(PXP2, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
BLOCK_PRTY_INFO_1(PXP2, 0x7ff, 0x7f, 0x7f, 0x7ff),
BLOCK_PRTY_INFO(HC, 0x7, 0x7, 0x7, 0),
diff --git a/drivers/net/bnx2x/bnx2x_main.c b/drivers/net/bnx2x/bnx2x_main.c
index d584d32c747d..aa032339e321 100644
--- a/drivers/net/bnx2x/bnx2x_main.c
+++ b/drivers/net/bnx2x/bnx2x_main.c
@@ -1974,13 +1974,22 @@ static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn)
vn_max_rate = 0;
} else {
+ u32 maxCfg = bnx2x_extract_max_cfg(bp, vn_cfg);
+
vn_min_rate = ((vn_cfg & FUNC_MF_CFG_MIN_BW_MASK) >>
FUNC_MF_CFG_MIN_BW_SHIFT) * 100;
- /* If min rate is zero - set it to 1 */
+ /* If fairness is enabled (not all min rates are zeroes) and
+ if current min rate is zero - set it to 1.
+ This is a requirement of the algorithm. */
if (bp->vn_weight_sum && (vn_min_rate == 0))
vn_min_rate = DEF_MIN_RATE;
- vn_max_rate = ((vn_cfg & FUNC_MF_CFG_MAX_BW_MASK) >>
- FUNC_MF_CFG_MAX_BW_SHIFT) * 100;
+
+ if (IS_MF_SI(bp))
+ /* maxCfg in percents of linkspeed */
+ vn_max_rate = (bp->link_vars.line_speed * maxCfg) / 100;
+ else
+ /* maxCfg is absolute in 100Mb units */
+ vn_max_rate = maxCfg * 100;
}
DP(NETIF_MSG_IFUP,
@@ -2006,7 +2015,8 @@ static void bnx2x_init_vn_minmax(struct bnx2x *bp, int vn)
m_fair_vn.vn_credit_delta =
max_t(u32, (vn_min_rate * (T_FAIR_COEF /
(8 * bp->vn_weight_sum))),
- (bp->cmng.fair_vars.fair_threshold * 2));
+ (bp->cmng.fair_vars.fair_threshold +
+ MIN_ABOVE_THRESH));
DP(NETIF_MSG_IFUP, "m_fair_vn.vn_credit_delta %d\n",
m_fair_vn.vn_credit_delta);
}
@@ -2082,8 +2092,9 @@ static void bnx2x_cmng_fns_init(struct bnx2x *bp, u8 read_cfg, u8 cmng_type)
bnx2x_calc_vn_weight_sum(bp);
/* calculate and set min-max rate for each vn */
- for (vn = VN_0; vn < E1HVN_MAX; vn++)
- bnx2x_init_vn_minmax(bp, vn);
+ if (bp->port.pmf)
+ for (vn = VN_0; vn < E1HVN_MAX; vn++)
+ bnx2x_init_vn_minmax(bp, vn);
/* always enable rate shaping and fairness */
bp->cmng.flags.cmng_enables |=
@@ -2152,13 +2163,6 @@ static void bnx2x_link_attn(struct bnx2x *bp)
bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP);
}
- /* indicate link status only if link status actually changed */
- if (prev_link_status != bp->link_vars.link_status)
- bnx2x_link_report(bp);
-
- if (IS_MF(bp))
- bnx2x_link_sync_notify(bp);
-
if (bp->link_vars.link_up && bp->link_vars.line_speed) {
int cmng_fns = bnx2x_get_cmng_fns_mode(bp);
@@ -2170,6 +2174,13 @@ static void bnx2x_link_attn(struct bnx2x *bp)
DP(NETIF_MSG_IFUP,
"single function mode without fairness\n");
}
+
+ if (IS_MF(bp))
+ bnx2x_link_sync_notify(bp);
+
+ /* indicate link status only if link status actually changed */
+ if (prev_link_status != bp->link_vars.link_status)
+ bnx2x_link_report(bp);
}
void bnx2x__link_status_update(struct bnx2x *bp)
diff --git a/drivers/net/bnx2x/bnx2x_stats.c b/drivers/net/bnx2x/bnx2x_stats.c
index bda60d590fa8..3445ded6674f 100644
--- a/drivers/net/bnx2x/bnx2x_stats.c
+++ b/drivers/net/bnx2x/bnx2x_stats.c
@@ -1239,14 +1239,14 @@ void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event)
if (unlikely(bp->panic))
return;
+ bnx2x_stats_stm[bp->stats_state][event].action(bp);
+
/* Protect a state change flow */
spin_lock_bh(&bp->stats_lock);
state = bp->stats_state;
bp->stats_state = bnx2x_stats_stm[state][event].next_state;
spin_unlock_bh(&bp->stats_lock);
- bnx2x_stats_stm[state][event].action(bp);
-
if ((event != STATS_EVENT_UPDATE) || netif_msg_timer(bp))
DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n",
state, event, bp->stats_state);
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 1024ae158227..a5d5d0b5b155 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -281,23 +281,23 @@ static inline int __check_agg_selection_timer(struct port *port)
}
/**
- * __get_rx_machine_lock - lock the port's RX machine
+ * __get_state_machine_lock - lock the port's state machines
* @port: the port we're looking at
*
*/
-static inline void __get_rx_machine_lock(struct port *port)
+static inline void __get_state_machine_lock(struct port *port)
{
- spin_lock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock));
+ spin_lock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
}
/**
- * __release_rx_machine_lock - unlock the port's RX machine
+ * __release_state_machine_lock - unlock the port's state machines
* @port: the port we're looking at
*
*/
-static inline void __release_rx_machine_lock(struct port *port)
+static inline void __release_state_machine_lock(struct port *port)
{
- spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).rx_machine_lock));
+ spin_unlock_bh(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
}
/**
@@ -388,14 +388,14 @@ static u8 __get_duplex(struct port *port)
}
/**
- * __initialize_port_locks - initialize a port's RX machine spinlock
+ * __initialize_port_locks - initialize a port's STATE machine spinlock
* @port: the port we're looking at
*
*/
static inline void __initialize_port_locks(struct port *port)
{
// make sure it isn't called twice
- spin_lock_init(&(SLAVE_AD_INFO(port->slave).rx_machine_lock));
+ spin_lock_init(&(SLAVE_AD_INFO(port->slave).state_machine_lock));
}
//conversions
@@ -1025,9 +1025,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
{
rx_states_t last_state;
- // Lock to prevent 2 instances of this function to run simultaneously(rx interrupt and periodic machine callback)
- __get_rx_machine_lock(port);
-
// keep current State Machine state to compare later if it was changed
last_state = port->sm_rx_state;
@@ -1133,7 +1130,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
pr_err("%s: An illegal loopback occurred on adapter (%s).\n"
"Check the configuration to verify that all adapters are connected to 802.3ad compliant switch ports\n",
port->slave->dev->master->name, port->slave->dev->name);
- __release_rx_machine_lock(port);
return;
}
__update_selected(lacpdu, port);
@@ -1153,7 +1149,6 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
break;
}
}
- __release_rx_machine_lock(port);
}
/**
@@ -2155,6 +2150,12 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
goto re_arm;
}
+ /* Lock around state machines to protect data accessed
+ * by all (e.g., port->sm_vars). ad_rx_machine may run
+ * concurrently due to incoming LACPDU.
+ */
+ __get_state_machine_lock(port);
+
ad_rx_machine(NULL, port);
ad_periodic_machine(port);
ad_port_selection_logic(port);
@@ -2164,6 +2165,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
// turn off the BEGIN bit, since we already handled it
if (port->sm_vars & AD_PORT_BEGIN)
port->sm_vars &= ~AD_PORT_BEGIN;
+
+ __release_state_machine_lock(port);
}
re_arm:
@@ -2200,7 +2203,10 @@ static void bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave, u
case AD_TYPE_LACPDU:
pr_debug("Received LACPDU on port %d\n",
port->actor_port_number);
+ /* Protect against concurrent state machines */
+ __get_state_machine_lock(port);
ad_rx_machine(lacpdu, port);
+ __release_state_machine_lock(port);
break;
case AD_TYPE_MARKER:
diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h
index 2c46a154f2c6..b28baff70864 100644
--- a/drivers/net/bonding/bond_3ad.h
+++ b/drivers/net/bonding/bond_3ad.h
@@ -264,7 +264,8 @@ struct ad_bond_info {
struct ad_slave_info {
struct aggregator aggregator; // 802.3ad aggregator structure
struct port port; // 802.3ad port structure
- spinlock_t rx_machine_lock; // To avoid race condition between callback and receive interrupt
+ spinlock_t state_machine_lock; /* mutex state machines vs.
+ incoming LACPDU */
u16 id;
};
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index 7ab534aee452..7513c4523ac4 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -940,7 +940,7 @@ static int mcp251x_open(struct net_device *net)
goto open_unlock;
}
- priv->wq = create_freezeable_workqueue("mcp251x_wq");
+ priv->wq = create_freezable_workqueue("mcp251x_wq");
INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler);
INIT_WORK(&priv->restart_work, mcp251x_restart_work_handler);
diff --git a/drivers/net/can/softing/Kconfig b/drivers/net/can/softing/Kconfig
index 8ba81b3ddd90..5de46a9a77bb 100644
--- a/drivers/net/can/softing/Kconfig
+++ b/drivers/net/can/softing/Kconfig
@@ -18,7 +18,7 @@ config CAN_SOFTING
config CAN_SOFTING_CS
tristate "Softing Gmbh CAN pcmcia cards"
depends on PCMCIA
- select CAN_SOFTING
+ depends on CAN_SOFTING
---help---
Support for PCMCIA cards from Softing Gmbh & some cards
from Vector Gmbh.
diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c
index 5157e15e96eb..aeea9f9ff6e8 100644
--- a/drivers/net/can/softing/softing_main.c
+++ b/drivers/net/can/softing/softing_main.c
@@ -633,6 +633,7 @@ static const struct net_device_ops softing_netdev_ops = {
};
static const struct can_bittiming_const softing_btr_const = {
+ .name = "softing",
.tseg1_min = 1,
.tseg1_max = 16,
.tseg2_min = 1,
diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index 7ff170cbc7dc..302be4aa69d6 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -2760,6 +2760,8 @@ static u32 cnic_service_bnx2_queues(struct cnic_dev *dev)
u32 status_idx = (u16) *cp->kcq1.status_idx_ptr;
int kcqe_cnt;
+ /* status block index must be read before reading other fields */
+ rmb();
cp->kwq_con_idx = *cp->kwq_con_idx_ptr;
while ((kcqe_cnt = cnic_get_kcqes(dev, &cp->kcq1))) {
@@ -2770,6 +2772,8 @@ static u32 cnic_service_bnx2_queues(struct cnic_dev *dev)
barrier();
if (status_idx != *cp->kcq1.status_idx_ptr) {
status_idx = (u16) *cp->kcq1.status_idx_ptr;
+ /* status block index must be read first */
+ rmb();
cp->kwq_con_idx = *cp->kwq_con_idx_ptr;
} else
break;
@@ -2888,6 +2892,8 @@ static u32 cnic_service_bnx2x_kcq(struct cnic_dev *dev, struct kcq_info *info)
u32 last_status = *info->status_idx_ptr;
int kcqe_cnt;
+ /* status block index must be read before reading the KCQ */
+ rmb();
while ((kcqe_cnt = cnic_get_kcqes(dev, info))) {
service_kcqes(dev, kcqe_cnt);
@@ -2898,6 +2904,8 @@ static u32 cnic_service_bnx2x_kcq(struct cnic_dev *dev, struct kcq_info *info)
break;
last_status = *info->status_idx_ptr;
+ /* status block index must be read before reading the KCQ */
+ rmb();
}
return last_status;
}
@@ -2906,26 +2914,35 @@ static void cnic_service_bnx2x_bh(unsigned long data)
{
struct cnic_dev *dev = (struct cnic_dev *) data;
struct cnic_local *cp = dev->cnic_priv;
- u32 status_idx;
+ u32 status_idx, new_status_idx;
if (unlikely(!test_bit(CNIC_F_CNIC_UP, &dev->flags)))
return;
- status_idx = cnic_service_bnx2x_kcq(dev, &cp->kcq1);
+ while (1) {
+ status_idx = cnic_service_bnx2x_kcq(dev, &cp->kcq1);
- CNIC_WR16(dev, cp->kcq1.io_addr, cp->kcq1.sw_prod_idx + MAX_KCQ_IDX);
+ CNIC_WR16(dev, cp->kcq1.io_addr,
+ cp->kcq1.sw_prod_idx + MAX_KCQ_IDX);
- if (BNX2X_CHIP_IS_E2(cp->chip_id)) {
- status_idx = cnic_service_bnx2x_kcq(dev, &cp->kcq2);
+ if (!BNX2X_CHIP_IS_E2(cp->chip_id)) {
+ cnic_ack_bnx2x_int(dev, cp->bnx2x_igu_sb_id, USTORM_ID,
+ status_idx, IGU_INT_ENABLE, 1);
+ break;
+ }
+
+ new_status_idx = cnic_service_bnx2x_kcq(dev, &cp->kcq2);
+
+ if (new_status_idx != status_idx)
+ continue;
CNIC_WR16(dev, cp->kcq2.io_addr, cp->kcq2.sw_prod_idx +
MAX_KCQ_IDX);
cnic_ack_igu_sb(dev, cp->bnx2x_igu_sb_id, IGU_SEG_ACCESS_DEF,
status_idx, IGU_INT_ENABLE, 1);
- } else {
- cnic_ack_bnx2x_int(dev, cp->bnx2x_igu_sb_id, USTORM_ID,
- status_idx, IGU_INT_ENABLE, 1);
+
+ break;
}
}
diff --git a/drivers/net/cxgb4/t4_msg.h b/drivers/net/cxgb4/t4_msg.h
index a550d0c706f3..eb71b8250b91 100644
--- a/drivers/net/cxgb4/t4_msg.h
+++ b/drivers/net/cxgb4/t4_msg.h
@@ -123,6 +123,7 @@ enum {
ULP_MODE_NONE = 0,
ULP_MODE_ISCSI = 2,
ULP_MODE_RDMA = 4,
+ ULP_MODE_TCPDDP = 5,
ULP_MODE_FCOE = 6,
};
diff --git a/drivers/net/cxgb4vf/cxgb4vf_main.c b/drivers/net/cxgb4vf/cxgb4vf_main.c
index 56166ae2059f..6aad64df4dcb 100644
--- a/drivers/net/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/cxgb4vf/cxgb4vf_main.c
@@ -2040,7 +2040,7 @@ static int __devinit setup_debugfs(struct adapter *adapter)
{
int i;
- BUG_ON(adapter->debugfs_root == NULL);
+ BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
/*
* Debugfs support is best effort.
@@ -2061,7 +2061,7 @@ static int __devinit setup_debugfs(struct adapter *adapter)
*/
static void cleanup_debugfs(struct adapter *adapter)
{
- BUG_ON(adapter->debugfs_root == NULL);
+ BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
/*
* Unlike our sister routine cleanup_proc(), we don't need to remove
@@ -2489,17 +2489,6 @@ static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev,
struct net_device *netdev;
/*
- * Vet our module parameters.
- */
- if (msi != MSI_MSIX && msi != MSI_MSI) {
- dev_err(&pdev->dev, "bad module parameter msi=%d; must be %d"
- " (MSI-X or MSI) or %d (MSI)\n", msi, MSI_MSIX,
- MSI_MSI);
- err = -EINVAL;
- goto err_out;
- }
-
- /*
* Print our driver banner the first time we're called to initialize a
* device.
*/
@@ -2711,11 +2700,11 @@ static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev,
/*
* Set up our debugfs entries.
*/
- if (cxgb4vf_debugfs_root) {
+ if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
adapter->debugfs_root =
debugfs_create_dir(pci_name(pdev),
cxgb4vf_debugfs_root);
- if (adapter->debugfs_root == NULL)
+ if (IS_ERR_OR_NULL(adapter->debugfs_root))
dev_warn(&pdev->dev, "could not create debugfs"
" directory");
else
@@ -2770,7 +2759,7 @@ static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev,
*/
err_free_debugfs:
- if (adapter->debugfs_root) {
+ if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
cleanup_debugfs(adapter);
debugfs_remove_recursive(adapter->debugfs_root);
}
@@ -2802,7 +2791,6 @@ err_release_regions:
err_disable_device:
pci_disable_device(pdev);
-err_out:
return err;
}
@@ -2840,7 +2828,7 @@ static void __devexit cxgb4vf_pci_remove(struct pci_dev *pdev)
/*
* Tear down our debugfs entries.
*/
- if (adapter->debugfs_root) {
+ if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
cleanup_debugfs(adapter);
debugfs_remove_recursive(adapter->debugfs_root);
}
@@ -2874,6 +2862,46 @@ static void __devexit cxgb4vf_pci_remove(struct pci_dev *pdev)
}
/*
+ * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
+ * delivery.
+ */
+static void __devexit cxgb4vf_pci_shutdown(struct pci_dev *pdev)
+{
+ struct adapter *adapter;
+ int pidx;
+
+ adapter = pci_get_drvdata(pdev);
+ if (!adapter)
+ return;
+
+ /*
+ * Disable all Virtual Interfaces. This will shut down the
+ * delivery of all ingress packets into the chip for these
+ * Virtual Interfaces.
+ */
+ for_each_port(adapter, pidx) {
+ struct net_device *netdev;
+ struct port_info *pi;
+
+ if (!test_bit(pidx, &adapter->registered_device_map))
+ continue;
+
+ netdev = adapter->port[pidx];
+ if (!netdev)
+ continue;
+
+ pi = netdev_priv(netdev);
+ t4vf_enable_vi(adapter, pi->viid, false, false);
+ }
+
+ /*
+ * Free up all Queues which will prevent further DMA and
+ * Interrupts allowing various internal pathways to drain.
+ */
+ t4vf_free_sge_resources(adapter);
+}
+
+/*
* PCI Device registration data structures.
*/
#define CH_DEVICE(devid, idx) \
@@ -2906,6 +2934,7 @@ static struct pci_driver cxgb4vf_driver = {
.id_table = cxgb4vf_pci_tbl,
.probe = cxgb4vf_pci_probe,
.remove = __devexit_p(cxgb4vf_pci_remove),
+ .shutdown = __devexit_p(cxgb4vf_pci_shutdown),
};
/*
@@ -2915,14 +2944,25 @@ static int __init cxgb4vf_module_init(void)
{
int ret;
+ /*
+ * Vet our module parameters.
+ */
+ if (msi != MSI_MSIX && msi != MSI_MSI) {
+ printk(KERN_WARNING KBUILD_MODNAME
+ ": bad module parameter msi=%d; must be %d"
+ " (MSI-X or MSI) or %d (MSI)\n",
+ msi, MSI_MSIX, MSI_MSI);
+ return -EINVAL;
+ }
+
/* Debugfs support is optional, just warn if this fails */
cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
- if (!cxgb4vf_debugfs_root)
+ if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
printk(KERN_WARNING KBUILD_MODNAME ": could not create"
" debugfs entry, continuing\n");
ret = pci_register_driver(&cxgb4vf_driver);
- if (ret < 0)
+ if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
debugfs_remove(cxgb4vf_debugfs_root);
return ret;
}
diff --git a/drivers/net/cxgb4vf/t4vf_hw.c b/drivers/net/cxgb4vf/t4vf_hw.c
index 0f51c80475ce..192db226ec7f 100644
--- a/drivers/net/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/cxgb4vf/t4vf_hw.c
@@ -171,7 +171,7 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
delay_idx = 0;
ms = delay[0];
- for (i = 0; i < 500; i += ms) {
+ for (i = 0; i < FW_CMD_MAX_TIMEOUT; i += ms) {
if (sleep_ok) {
ms = delay[delay_idx];
if (delay_idx < ARRAY_SIZE(delay) - 1)
diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 2a628d17d178..7018bfe408a4 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -1008,7 +1008,7 @@ static void emac_rx_handler(void *token, int len, int status)
int ret;
/* free and bail if we are shutting down */
- if (unlikely(!netif_running(ndev))) {
+ if (unlikely(!netif_running(ndev) || !netif_carrier_ok(ndev))) {
dev_kfree_skb_any(skb);
return;
}
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 2d4c4fc1d900..461dd6f905f7 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -802,10 +802,7 @@ dm9000_init_dm9000(struct net_device *dev)
/* Checksum mode */
dm9000_set_rx_csum_unlocked(dev, db->rx_csum);
- /* GPIO0 on pre-activate PHY */
- iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */
iow(db, DM9000_GPCR, GPCR_GEP_CNTL); /* Let GPIO0 output */
- iow(db, DM9000_GPR, 0); /* Enable PHY */
ncr = (db->flags & DM9000_PLATF_EXT_PHY) ? NCR_EXT_PHY : 0;
@@ -852,8 +849,8 @@ static void dm9000_timeout(struct net_device *dev)
unsigned long flags;
/* Save previous register address */
- reg_save = readb(db->io_addr);
spin_lock_irqsave(&db->lock, flags);
+ reg_save = readb(db->io_addr);
netif_stop_queue(dev);
dm9000_reset(db);
@@ -1194,6 +1191,10 @@ dm9000_open(struct net_device *dev)
if (request_irq(dev->irq, dm9000_interrupt, irqflags, dev->name, dev))
return -EAGAIN;
+ /* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */
+ iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */
+ mdelay(1); /* delay needs by DM9000B */
+
/* Initialize DM9000 board */
dm9000_reset(db);
dm9000_init_dm9000(dev);
diff --git a/drivers/net/dnet.c b/drivers/net/dnet.c
index 9d8a20b72fa9..8318ea06cb6d 100644
--- a/drivers/net/dnet.c
+++ b/drivers/net/dnet.c
@@ -337,8 +337,6 @@ static int dnet_mii_init(struct dnet *bp)
for (i = 0; i < PHY_MAX_ADDR; i++)
bp->mii_bus->irq[i] = PHY_POLL;
- platform_set_drvdata(bp->dev, bp->mii_bus);
-
if (mdiobus_register(bp->mii_bus)) {
err = -ENXIO;
goto err_out_free_mdio_irq;
@@ -863,6 +861,7 @@ static int __devinit dnet_probe(struct platform_device *pdev)
bp = netdev_priv(dev);
bp->dev = dev;
+ platform_set_drvdata(pdev, dev);
SET_NETDEV_DEV(dev, &pdev->dev);
spin_lock_init(&bp->lock);
diff --git a/drivers/net/e1000/e1000_osdep.h b/drivers/net/e1000/e1000_osdep.h
index 55c1711f1688..33e7c45a4fe4 100644
--- a/drivers/net/e1000/e1000_osdep.h
+++ b/drivers/net/e1000/e1000_osdep.h
@@ -42,7 +42,8 @@
#define GBE_CONFIG_RAM_BASE \
((unsigned int)(CONFIG_RAM_BASE + GBE_CONFIG_OFFSET))
-#define GBE_CONFIG_BASE_VIRT phys_to_virt(GBE_CONFIG_RAM_BASE)
+#define GBE_CONFIG_BASE_VIRT \
+ ((void __iomem *)phys_to_virt(GBE_CONFIG_RAM_BASE))
#define GBE_CONFIG_FLASH_WRITE(base, offset, count, data) \
(iowrite16_rep(base + offset, data, count))
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 3065870cf2a7..6d513a383340 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -937,6 +937,9 @@ static void e1000_print_hw_hang(struct work_struct *work)
u16 phy_status, phy_1000t_status, phy_ext_status;
u16 pci_status;
+ if (test_bit(__E1000_DOWN, &adapter->state))
+ return;
+
e1e_rphy(hw, PHY_STATUS, &phy_status);
e1e_rphy(hw, PHY_1000T_STATUS, &phy_1000t_status);
e1e_rphy(hw, PHY_EXT_STATUS, &phy_ext_status);
@@ -1506,6 +1509,9 @@ static void e1000e_downshift_workaround(struct work_struct *work)
struct e1000_adapter *adapter = container_of(work,
struct e1000_adapter, downshift_task);
+ if (test_bit(__E1000_DOWN, &adapter->state))
+ return;
+
e1000e_gig_downshift_workaround_ich8lan(&adapter->hw);
}
@@ -3338,6 +3344,21 @@ int e1000e_up(struct e1000_adapter *adapter)
return 0;
}
+static void e1000e_flush_descriptors(struct e1000_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ if (!(adapter->flags2 & FLAG2_DMA_BURST))
+ return;
+
+ /* flush pending descriptor writebacks to memory */
+ ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
+ ew32(RDTR, adapter->rx_int_delay | E1000_RDTR_FPD);
+
+ /* execute the writes immediately */
+ e1e_flush();
+}
+
void e1000e_down(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
@@ -3377,6 +3398,9 @@ void e1000e_down(struct e1000_adapter *adapter)
if (!pci_channel_offline(adapter->pdev))
e1000e_reset(adapter);
+
+ e1000e_flush_descriptors(adapter);
+
e1000_clean_tx_ring(adapter);
e1000_clean_rx_ring(adapter);
@@ -3765,6 +3789,10 @@ static void e1000e_update_phy_task(struct work_struct *work)
{
struct e1000_adapter *adapter = container_of(work,
struct e1000_adapter, update_phy_task);
+
+ if (test_bit(__E1000_DOWN, &adapter->state))
+ return;
+
e1000_get_phy_info(&adapter->hw);
}
@@ -3775,6 +3803,10 @@ static void e1000e_update_phy_task(struct work_struct *work)
static void e1000_update_phy_info(unsigned long data)
{
struct e1000_adapter *adapter = (struct e1000_adapter *) data;
+
+ if (test_bit(__E1000_DOWN, &adapter->state))
+ return;
+
schedule_work(&adapter->update_phy_task);
}
@@ -4149,6 +4181,9 @@ static void e1000_watchdog_task(struct work_struct *work)
u32 link, tctl;
int tx_pending = 0;
+ if (test_bit(__E1000_DOWN, &adapter->state))
+ return;
+
link = e1000e_has_link(adapter);
if ((netif_carrier_ok(netdev)) && link) {
/* Cancel scheduled suspend requests. */
@@ -4337,19 +4372,12 @@ link_up:
else
ew32(ICS, E1000_ICS_RXDMT0);
+ /* flush pending descriptors to memory before detecting Tx hang */
+ e1000e_flush_descriptors(adapter);
+
/* Force detection of hung controller every watchdog period */
adapter->detect_tx_hung = 1;
- /* flush partial descriptors to memory before detecting Tx hang */
- if (adapter->flags2 & FLAG2_DMA_BURST) {
- ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
- ew32(RDTR, adapter->rx_int_delay | E1000_RDTR_FPD);
- /*
- * no need to flush the writes because the timeout code does
- * an er32 first thing
- */
- }
-
/*
* With 82571 controllers, LAA may be overwritten due to controller
* reset from the other port. Set the appropriate LAA in RAR[0]
@@ -4887,6 +4915,10 @@ static void e1000_reset_task(struct work_struct *work)
struct e1000_adapter *adapter;
adapter = container_of(work, struct e1000_adapter, reset_task);
+ /* don't run the task if already down */
+ if (test_bit(__E1000_DOWN, &adapter->state))
+ return;
+
if (!((adapter->flags & FLAG_RX_NEEDS_RESTART) &&
(adapter->flags & FLAG_RX_RESTART_NOW))) {
e1000e_dump(adapter);
@@ -5306,7 +5338,7 @@ void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
__e1000e_disable_aspm(pdev, state);
}
-#ifdef CONFIG_PM_OPS
+#ifdef CONFIG_PM
static bool e1000e_pm_ready(struct e1000_adapter *adapter)
{
return !!adapter->tx_ring->buffer_info;
@@ -5457,7 +5489,7 @@ static int e1000_runtime_resume(struct device *dev)
return __e1000_resume(pdev);
}
#endif /* CONFIG_PM_RUNTIME */
-#endif /* CONFIG_PM_OPS */
+#endif /* CONFIG_PM */
static void e1000_shutdown(struct pci_dev *pdev)
{
@@ -5935,7 +5967,8 @@ static int __devinit e1000_probe(struct pci_dev *pdev,
/* APME bit in EEPROM is mapped to WUC.APME */
eeprom_data = er32(WUC);
eeprom_apme_mask = E1000_WUC_APME;
- if (eeprom_data & E1000_WUC_PHY_WAKE)
+ if ((hw->mac.type > e1000_ich10lan) &&
+ (eeprom_data & E1000_WUC_PHY_WAKE))
adapter->flags2 |= FLAG2_HAS_PHY_WAKEUP;
} else if (adapter->flags & FLAG_APME_IN_CTRL3) {
if (adapter->flags & FLAG_APME_CHECK_PORT_B &&
@@ -6163,7 +6196,7 @@ static DEFINE_PCI_DEVICE_TABLE(e1000_pci_tbl) = {
};
MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
-#ifdef CONFIG_PM_OPS
+#ifdef CONFIG_PM
static const struct dev_pm_ops e1000_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(e1000_suspend, e1000_resume)
SET_RUNTIME_PM_OPS(e1000_runtime_suspend,
@@ -6177,7 +6210,7 @@ static struct pci_driver e1000_driver = {
.id_table = e1000_pci_tbl,
.probe = e1000_probe,
.remove = __devexit_p(e1000_remove),
-#ifdef CONFIG_PM_OPS
+#ifdef CONFIG_PM
.driver.pm = &e1000_pm_ops,
#endif
.shutdown = e1000_shutdown,
diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
index b79d7e1555d5..db0290f05bdf 100644
--- a/drivers/net/ethoc.c
+++ b/drivers/net/ethoc.c
@@ -1163,15 +1163,11 @@ static int ethoc_resume(struct platform_device *pdev)
# define ethoc_resume NULL
#endif
-#ifdef CONFIG_OF
static struct of_device_id ethoc_match[] = {
- {
- .compatible = "opencores,ethoc",
- },
+ { .compatible = "opencores,ethoc", },
{},
};
MODULE_DEVICE_TABLE(of, ethoc_match);
-#endif
static struct platform_driver ethoc_driver = {
.probe = ethoc_probe,
@@ -1181,9 +1177,7 @@ static struct platform_driver ethoc_driver = {
.driver = {
.name = "ethoc",
.owner = THIS_MODULE,
-#ifdef CONFIG_OF
.of_match_table = ethoc_match,
-#endif
},
};
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 2a71373719ae..cd0282d5d40f 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -74,7 +74,8 @@ static struct platform_device_id fec_devtype[] = {
}, {
.name = "imx28-fec",
.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME,
- }
+ },
+ { }
};
static unsigned char macaddr[ETH_ALEN];
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index af09296ef0dd..9c0b1bac6af6 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -5645,6 +5645,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
goto out_error;
}
+ netif_carrier_off(dev);
+
dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n",
dev->name, np->phy_oui, np->phyaddr, dev->dev_addr);
diff --git a/drivers/net/igbvf/vf.c b/drivers/net/igbvf/vf.c
index 74486a8b009a..af3822f9ea9a 100644
--- a/drivers/net/igbvf/vf.c
+++ b/drivers/net/igbvf/vf.c
@@ -220,7 +220,7 @@ static u32 e1000_hash_mc_addr_vf(struct e1000_hw *hw, u8 *mc_addr)
* The parameter rar_count will usually be hw->mac.rar_entry_count
* unless there are workarounds that change this.
**/
-void e1000_update_mc_addr_list_vf(struct e1000_hw *hw,
+static void e1000_update_mc_addr_list_vf(struct e1000_hw *hw,
u8 *mc_addr_list, u32 mc_addr_count,
u32 rar_used_count, u32 rar_count)
{
diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c
index 8753980668c7..c54a88274d51 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ixgbe/ixgbe_fcoe.c
@@ -159,7 +159,7 @@ int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
struct scatterlist *sg;
unsigned int i, j, dmacount;
unsigned int len;
- static const unsigned int bufflen = 4096;
+ static const unsigned int bufflen = IXGBE_FCBUFF_MIN;
unsigned int firstoff = 0;
unsigned int lastsize;
unsigned int thisoff = 0;
@@ -254,6 +254,24 @@ int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
/* only the last buffer may have non-full bufflen */
lastsize = thisoff + thislen;
+ /*
+ * lastsize can not be buffer len.
+ * If it is then adding another buffer with lastsize = 1.
+ */
+ if (lastsize == bufflen) {
+ if (j >= IXGBE_BUFFCNT_MAX) {
+ e_err(drv, "xid=%x:%d,%d,%d:addr=%llx "
+ "not enough user buffers. We need an extra "
+ "buffer because lastsize is bufflen.\n",
+ xid, i, j, dmacount, (u64)addr);
+ goto out_noddp_free;
+ }
+
+ ddp->udl[j] = (u64)(fcoe->extra_ddp_buffer_dma);
+ j++;
+ lastsize = 1;
+ }
+
fcbuff = (IXGBE_FCBUFF_4KB << IXGBE_FCBUFF_BUFFSIZE_SHIFT);
fcbuff |= ((j & 0xff) << IXGBE_FCBUFF_BUFFCNT_SHIFT);
fcbuff |= (firstoff << IXGBE_FCBUFF_OFFSET_SHIFT);
@@ -532,6 +550,24 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
e_err(drv, "failed to allocated FCoE DDP pool\n");
spin_lock_init(&fcoe->lock);
+
+ /* Extra buffer to be shared by all DDPs for HW work around */
+ fcoe->extra_ddp_buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_ATOMIC);
+ if (fcoe->extra_ddp_buffer == NULL) {
+ e_err(drv, "failed to allocated extra DDP buffer\n");
+ goto out_extra_ddp_buffer_alloc;
+ }
+
+ fcoe->extra_ddp_buffer_dma =
+ dma_map_single(&adapter->pdev->dev,
+ fcoe->extra_ddp_buffer,
+ IXGBE_FCBUFF_MIN,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(&adapter->pdev->dev,
+ fcoe->extra_ddp_buffer_dma)) {
+ e_err(drv, "failed to map extra DDP buffer\n");
+ goto out_extra_ddp_buffer_dma;
+ }
}
/* Enable L2 eth type filter for FCoE */
@@ -581,6 +617,14 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
}
}
#endif
+
+ return;
+
+out_extra_ddp_buffer_dma:
+ kfree(fcoe->extra_ddp_buffer);
+out_extra_ddp_buffer_alloc:
+ pci_pool_destroy(fcoe->pool);
+ fcoe->pool = NULL;
}
/**
@@ -600,6 +644,11 @@ void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter)
if (fcoe->pool) {
for (i = 0; i < IXGBE_FCOE_DDP_MAX; i++)
ixgbe_fcoe_ddp_put(adapter->netdev, i);
+ dma_unmap_single(&adapter->pdev->dev,
+ fcoe->extra_ddp_buffer_dma,
+ IXGBE_FCBUFF_MIN,
+ DMA_FROM_DEVICE);
+ kfree(fcoe->extra_ddp_buffer);
pci_pool_destroy(fcoe->pool);
fcoe->pool = NULL;
}
diff --git a/drivers/net/ixgbe/ixgbe_fcoe.h b/drivers/net/ixgbe/ixgbe_fcoe.h
index 4bc2c551c8db..65cc8fb14fe7 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.h
+++ b/drivers/net/ixgbe/ixgbe_fcoe.h
@@ -70,6 +70,8 @@ struct ixgbe_fcoe {
spinlock_t lock;
struct pci_pool *pool;
struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX];
+ unsigned char *extra_ddp_buffer;
+ dma_addr_t extra_ddp_buffer_dma;
};
#endif /* _IXGBE_FCOE_H */
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index fbae703b46d7..30f9ccfb4f87 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -3728,7 +3728,8 @@ static void ixgbe_sfp_link_config(struct ixgbe_adapter *adapter)
* We need to try and force an autonegotiation
* session, then bring up link.
*/
- hw->mac.ops.setup_sfp(hw);
+ if (hw->mac.ops.setup_sfp)
+ hw->mac.ops.setup_sfp(hw);
if (!(adapter->flags & IXGBE_FLAG_IN_SFP_LINK_TASK))
schedule_work(&adapter->multispeed_fiber_task);
} else {
@@ -5968,7 +5969,8 @@ static void ixgbe_sfp_config_module_task(struct work_struct *work)
unregister_netdev(adapter->netdev);
return;
}
- hw->mac.ops.setup_sfp(hw);
+ if (hw->mac.ops.setup_sfp)
+ hw->mac.ops.setup_sfp(hw);
if (!(adapter->flags & IXGBE_FLAG_IN_SFP_LINK_TASK))
/* This will also work for DA Twinax connections */
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index f69e73e2191e..79ccb54ab00c 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -260,7 +260,7 @@ static int macb_mii_init(struct macb *bp)
for (i = 0; i < PHY_MAX_ADDR; i++)
bp->mii_bus->irq[i] = PHY_POLL;
- platform_set_drvdata(bp->dev, bp->mii_bus);
+ dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
if (mdiobus_register(bp->mii_bus))
goto err_out_free_mdio_irq;
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 5933621ac3ff..fc27a9926d9e 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -528,8 +528,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
vnet_hdr_len = q->vnet_hdr_sz;
err = -EINVAL;
- if ((len -= vnet_hdr_len) < 0)
+ if (len < vnet_hdr_len)
goto err;
+ len -= vnet_hdr_len;
err = memcpy_fromiovecend((void *)&vnet_hdr, iv, 0,
sizeof(vnet_hdr));
diff --git a/drivers/net/pch_gbe/pch_gbe.h b/drivers/net/pch_gbe/pch_gbe.h
index a0c26a99520f..e1e33c80fb25 100644
--- a/drivers/net/pch_gbe/pch_gbe.h
+++ b/drivers/net/pch_gbe/pch_gbe.h
@@ -73,7 +73,7 @@ struct pch_gbe_regs {
struct pch_gbe_regs_mac_adr mac_adr[16];
u32 ADDR_MASK;
u32 MIIM;
- u32 reserve2;
+ u32 MAC_ADDR_LOAD;
u32 RGMII_ST;
u32 RGMII_CTRL;
u32 reserve3[3];
diff --git a/drivers/net/pch_gbe/pch_gbe_main.c b/drivers/net/pch_gbe/pch_gbe_main.c
index 4c9a7d4f3fca..8c66e22c3a0a 100644
--- a/drivers/net/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/pch_gbe/pch_gbe_main.c
@@ -29,6 +29,7 @@ const char pch_driver_version[] = DRV_VERSION;
#define PCH_GBE_SHORT_PKT 64
#define DSC_INIT16 0xC000
#define PCH_GBE_DMA_ALIGN 0
+#define PCH_GBE_DMA_PADDING 2
#define PCH_GBE_WATCHDOG_PERIOD (1 * HZ) /* watchdog time */
#define PCH_GBE_COPYBREAK_DEFAULT 256
#define PCH_GBE_PCI_BAR 1
@@ -88,6 +89,12 @@ static unsigned int copybreak __read_mostly = PCH_GBE_COPYBREAK_DEFAULT;
static int pch_gbe_mdio_read(struct net_device *netdev, int addr, int reg);
static void pch_gbe_mdio_write(struct net_device *netdev, int addr, int reg,
int data);
+
+inline void pch_gbe_mac_load_mac_addr(struct pch_gbe_hw *hw)
+{
+ iowrite32(0x01, &hw->reg->MAC_ADDR_LOAD);
+}
+
/**
* pch_gbe_mac_read_mac_addr - Read MAC address
* @hw: Pointer to the HW structure
@@ -1365,16 +1372,13 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
struct pch_gbe_buffer *buffer_info;
struct pch_gbe_rx_desc *rx_desc;
u32 length;
- unsigned char tmp_packet[ETH_HLEN];
unsigned int i;
unsigned int cleaned_count = 0;
bool cleaned = false;
- struct sk_buff *skb;
+ struct sk_buff *skb, *new_skb;
u8 dma_status;
u16 gbec_status;
u32 tcp_ip_status;
- u8 skb_copy_flag = 0;
- u8 skb_padding_flag = 0;
i = rx_ring->next_to_clean;
@@ -1418,55 +1422,70 @@ pch_gbe_clean_rx(struct pch_gbe_adapter *adapter,
pr_err("Receive CRC Error\n");
} else {
/* get receive length */
- /* length convert[-3], padding[-2] */
- length = (rx_desc->rx_words_eob) - 3 - 2;
+ /* length convert[-3] */
+ length = (rx_desc->rx_words_eob) - 3;
/* Decide the data conversion method */
if (!adapter->rx_csum) {
/* [Header:14][payload] */
- skb_padding_flag = 0;
- skb_copy_flag = 1;
+ if (NET_IP_ALIGN) {
+ /* Because alignment differs,
+ * the new_skb is newly allocated,
+ * and data is copied to new_skb.*/
+ new_skb = netdev_alloc_skb(netdev,
+ length + NET_IP_ALIGN);
+ if (!new_skb) {
+ /* dorrop error */
+ pr_err("New skb allocation "
+ "Error\n");
+ goto dorrop;
+ }
+ skb_reserve(new_skb, NET_IP_ALIGN);
+ memcpy(new_skb->data, skb->data,
+ length);
+ skb = new_skb;
+ } else {
+ /* DMA buffer is used as SKB as it is.*/
+ buffer_info->skb = NULL;
+ }
} else {
/* [Header:14][padding:2][payload] */
- skb_padding_flag = 1;
- if (length < copybreak)
- skb_copy_flag = 1;
- else
- skb_copy_flag = 0;
- }
-
- /* Data conversion */
- if (skb_copy_flag) { /* recycle skb */
- struct sk_buff *new_skb;
- new_skb =
- netdev_alloc_skb(netdev,
- length + NET_IP_ALIGN);
- if (new_skb) {
- if (!skb_padding_flag) {
- skb_reserve(new_skb,
- NET_IP_ALIGN);
+ /* The length includes padding length */
+ length = length - PCH_GBE_DMA_PADDING;
+ if ((length < copybreak) ||
+ (NET_IP_ALIGN != PCH_GBE_DMA_PADDING)) {
+ /* Because alignment differs,
+ * the new_skb is newly allocated,
+ * and data is copied to new_skb.
+ * Padding data is deleted
+ * at the time of a copy.*/
+ new_skb = netdev_alloc_skb(netdev,
+ length + NET_IP_ALIGN);
+ if (!new_skb) {
+ /* dorrop error */
+ pr_err("New skb allocation "
+ "Error\n");
+ goto dorrop;
}
+ skb_reserve(new_skb, NET_IP_ALIGN);
memcpy(new_skb->data, skb->data,
- length);
- /* save the skb
- * in buffer_info as good */
+ ETH_HLEN);
+ memcpy(&new_skb->data[ETH_HLEN],
+ &skb->data[ETH_HLEN +
+ PCH_GBE_DMA_PADDING],
+ length - ETH_HLEN);
skb = new_skb;
- } else if (!skb_padding_flag) {
- /* dorrop error */
- pr_err("New skb allocation Error\n");
- goto dorrop;
+ } else {
+ /* Padding data is deleted
+ * by moving header data.*/
+ memmove(&skb->data[PCH_GBE_DMA_PADDING],
+ &skb->data[0], ETH_HLEN);
+ skb_reserve(skb, NET_IP_ALIGN);
+ buffer_info->skb = NULL;
}
- } else {
- buffer_info->skb = NULL;
}
- if (skb_padding_flag) {
- memcpy(&tmp_packet[0], &skb->data[0], ETH_HLEN);
- memcpy(&skb->data[NET_IP_ALIGN], &tmp_packet[0],
- ETH_HLEN);
- skb_reserve(skb, NET_IP_ALIGN);
-
- }
-
+ /* The length includes FCS length */
+ length = length - ETH_FCS_LEN;
/* update status of driver */
adapter->stats.rx_bytes += length;
adapter->stats.rx_packets++;
@@ -2318,6 +2337,7 @@ static int pch_gbe_probe(struct pci_dev *pdev,
netdev->features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_GRO;
pch_gbe_set_ethtool_ops(netdev);
+ pch_gbe_mac_load_mac_addr(&adapter->hw);
pch_gbe_mac_reset_hw(&adapter->hw);
/* setup the private structure */
@@ -2426,7 +2446,7 @@ static struct pci_driver pch_gbe_pcidev = {
.id_table = pch_gbe_pcidev_id,
.probe = pch_gbe_probe,
.remove = pch_gbe_remove,
-#ifdef CONFIG_PM_OPS
+#ifdef CONFIG_PM
.driver.pm = &pch_gbe_pm_ops,
#endif
.shutdown = pch_gbe_shutdown,
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 9226cda4d054..530ab5a10bd3 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -691,6 +691,7 @@ static struct pcmcia_device_id fmvj18x_ids[] = {
PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0105, 0x0e0a),
PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0e01),
PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0a05),
+ PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x0b05),
PCMCIA_PFC_DEVICE_MANF_CARD(0, 0x0032, 0x1101),
PCMCIA_DEVICE_NULL,
};
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 27e6f6d43cac..e3ebd90ae651 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -49,8 +49,8 @@
#include <asm/processor.h>
#define DRV_NAME "r6040"
-#define DRV_VERSION "0.26"
-#define DRV_RELDATE "30May2010"
+#define DRV_VERSION "0.27"
+#define DRV_RELDATE "23Feb2011"
/* PHY CHIP Address */
#define PHY1_ADDR 1 /* For MAC1 */
@@ -69,6 +69,8 @@
/* MAC registers */
#define MCR0 0x00 /* Control register 0 */
+#define MCR0_PROMISC 0x0020 /* Promiscuous mode */
+#define MCR0_HASH_EN 0x0100 /* Enable multicast hash table function */
#define MCR1 0x04 /* Control register 1 */
#define MAC_RST 0x0001 /* Reset the MAC */
#define MBCR 0x08 /* Bus control */
@@ -851,77 +853,92 @@ static void r6040_multicast_list(struct net_device *dev)
{
struct r6040_private *lp = netdev_priv(dev);
void __iomem *ioaddr = lp->base;
- u16 *adrp;
- u16 reg;
unsigned long flags;
struct netdev_hw_addr *ha;
int i;
+ u16 *adrp;
+ u16 hash_table[4] = { 0 };
+
+ spin_lock_irqsave(&lp->lock, flags);
- /* MAC Address */
+ /* Keep our MAC Address */
adrp = (u16 *)dev->dev_addr;
iowrite16(adrp[0], ioaddr + MID_0L);
iowrite16(adrp[1], ioaddr + MID_0M);
iowrite16(adrp[2], ioaddr + MID_0H);
- /* Promiscous Mode */
- spin_lock_irqsave(&lp->lock, flags);
-
/* Clear AMCP & PROM bits */
- reg = ioread16(ioaddr) & ~0x0120;
- if (dev->flags & IFF_PROMISC) {
- reg |= 0x0020;
- lp->mcr0 |= 0x0020;
- }
- /* Too many multicast addresses
- * accept all traffic */
- else if ((netdev_mc_count(dev) > MCAST_MAX) ||
- (dev->flags & IFF_ALLMULTI))
- reg |= 0x0020;
+ lp->mcr0 = ioread16(ioaddr + MCR0) & ~(MCR0_PROMISC | MCR0_HASH_EN);
- iowrite16(reg, ioaddr);
- spin_unlock_irqrestore(&lp->lock, flags);
+ /* Promiscuous mode */
+ if (dev->flags & IFF_PROMISC)
+ lp->mcr0 |= MCR0_PROMISC;
- /* Build the hash table */
- if (netdev_mc_count(dev) > MCAST_MAX) {
- u16 hash_table[4];
- u32 crc;
+ /* Enable multicast hash table function to
+ * receive all multicast packets. */
+ else if (dev->flags & IFF_ALLMULTI) {
+ lp->mcr0 |= MCR0_HASH_EN;
- for (i = 0; i < 4; i++)
- hash_table[i] = 0;
+ for (i = 0; i < MCAST_MAX ; i++) {
+ iowrite16(0, ioaddr + MID_1L + 8 * i);
+ iowrite16(0, ioaddr + MID_1M + 8 * i);
+ iowrite16(0, ioaddr + MID_1H + 8 * i);
+ }
+ for (i = 0; i < 4; i++)
+ hash_table[i] = 0xffff;
+ }
+ /* Use internal multicast address registers if the number of
+ * multicast addresses is not greater than MCAST_MAX. */
+ else if (netdev_mc_count(dev) <= MCAST_MAX) {
+ i = 0;
netdev_for_each_mc_addr(ha, dev) {
- char *addrs = ha->addr;
+ u16 *adrp = (u16 *) ha->addr;
+ iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
+ iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
+ iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
+ i++;
+ }
+ while (i < MCAST_MAX) {
+ iowrite16(0, ioaddr + MID_1L + 8 * i);
+ iowrite16(0, ioaddr + MID_1M + 8 * i);
+ iowrite16(0, ioaddr + MID_1H + 8 * i);
+ i++;
+ }
+ }
+ /* Otherwise, Enable multicast hash table function. */
+ else {
+ u32 crc;
- if (!(*addrs & 1))
- continue;
+ lp->mcr0 |= MCR0_HASH_EN;
+
+ for (i = 0; i < MCAST_MAX ; i++) {
+ iowrite16(0, ioaddr + MID_1L + 8 * i);
+ iowrite16(0, ioaddr + MID_1M + 8 * i);
+ iowrite16(0, ioaddr + MID_1H + 8 * i);
+ }
- crc = ether_crc_le(6, addrs);
+ /* Build multicast hash table */
+ netdev_for_each_mc_addr(ha, dev) {
+ u8 *addrs = ha->addr;
+
+ crc = ether_crc(ETH_ALEN, addrs);
crc >>= 26;
- hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf));
+ hash_table[crc >> 4] |= 1 << (crc & 0xf);
}
- /* Fill the MAC hash tables with their values */
+ }
+
+ iowrite16(lp->mcr0, ioaddr + MCR0);
+
+ /* Fill the MAC hash tables with their values */
+ if (lp->mcr0 && MCR0_HASH_EN) {
iowrite16(hash_table[0], ioaddr + MAR0);
iowrite16(hash_table[1], ioaddr + MAR1);
iowrite16(hash_table[2], ioaddr + MAR2);
iowrite16(hash_table[3], ioaddr + MAR3);
}
- /* Multicast Address 1~4 case */
- i = 0;
- netdev_for_each_mc_addr(ha, dev) {
- if (i >= MCAST_MAX)
- break;
- adrp = (u16 *) ha->addr;
- iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
- iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
- iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
- i++;
- }
- while (i < MCAST_MAX) {
- iowrite16(0xffff, ioaddr + MID_1L + 8 * i);
- iowrite16(0xffff, ioaddr + MID_1M + 8 * i);
- iowrite16(0xffff, ioaddr + MID_1H + 8 * i);
- i++;
- }
+
+ spin_unlock_irqrestore(&lp->lock, flags);
}
static void netdev_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 59ccf0c5c610..7ffdb80adf40 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -25,6 +25,7 @@
#include <linux/dma-mapping.h>
#include <linux/pm_runtime.h>
#include <linux/firmware.h>
+#include <linux/pci-aspm.h>
#include <asm/system.h>
#include <asm/io.h>
@@ -617,8 +618,9 @@ static void ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg, u32 data)
}
}
-static void rtl8168_oob_notify(void __iomem *ioaddr, u8 cmd)
+static void rtl8168_oob_notify(struct rtl8169_private *tp, u8 cmd)
{
+ void __iomem *ioaddr = tp->mmio_addr;
int i;
RTL_W8(ERIDR, cmd);
@@ -630,7 +632,7 @@ static void rtl8168_oob_notify(void __iomem *ioaddr, u8 cmd)
break;
}
- ocp_write(ioaddr, 0x1, 0x30, 0x00000001);
+ ocp_write(tp, 0x1, 0x30, 0x00000001);
}
#define OOB_CMD_RESET 0x00
@@ -2868,8 +2870,11 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
{
void __iomem *ioaddr = tp->mmio_addr;
- if (tp->mac_version == RTL_GIGA_MAC_VER_27)
+ if (((tp->mac_version == RTL_GIGA_MAC_VER_27) ||
+ (tp->mac_version == RTL_GIGA_MAC_VER_28)) &&
+ (ocp_read(tp, 0x0f, 0x0010) & 0x00008000)) {
return;
+ }
if (((tp->mac_version == RTL_GIGA_MAC_VER_23) ||
(tp->mac_version == RTL_GIGA_MAC_VER_24)) &&
@@ -2891,6 +2896,8 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_25:
case RTL_GIGA_MAC_VER_26:
+ case RTL_GIGA_MAC_VER_27:
+ case RTL_GIGA_MAC_VER_28:
RTL_W8(PMCH, RTL_R8(PMCH) & ~0x80);
break;
}
@@ -2900,12 +2907,17 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
{
void __iomem *ioaddr = tp->mmio_addr;
- if (tp->mac_version == RTL_GIGA_MAC_VER_27)
+ if (((tp->mac_version == RTL_GIGA_MAC_VER_27) ||
+ (tp->mac_version == RTL_GIGA_MAC_VER_28)) &&
+ (ocp_read(tp, 0x0f, 0x0010) & 0x00008000)) {
return;
+ }
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_25:
case RTL_GIGA_MAC_VER_26:
+ case RTL_GIGA_MAC_VER_27:
+ case RTL_GIGA_MAC_VER_28:
RTL_W8(PMCH, RTL_R8(PMCH) | 0x80);
break;
}
@@ -3009,6 +3021,11 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
mii->reg_num_mask = 0x1f;
mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII);
+ /* disable ASPM completely as that cause random device stop working
+ * problems as well as full system hangs for some PCIe devices users */
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |
+ PCIE_LINK_STATE_CLKPM);
+
/* enable device (incl. PCI PM wakeup and hotplug setup) */
rc = pci_enable_device(pdev);
if (rc < 0) {
@@ -3042,7 +3059,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_mwi_2;
}
- tp->cp_cmd = PCIMulRW | RxChkSum;
+ tp->cp_cmd = RxChkSum;
if ((sizeof(dma_addr_t) > 4) &&
!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && use_dac) {
@@ -3190,6 +3207,8 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pci_dev_run_wake(pdev))
pm_runtime_put_noidle(&pdev->dev);
+ netif_carrier_off(dev);
+
out:
return rc;
@@ -3316,7 +3335,8 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
/* Disable interrupts */
rtl8169_irq_mask_and_ack(ioaddr);
- if (tp->mac_version == RTL_GIGA_MAC_VER_28) {
+ if (tp->mac_version == RTL_GIGA_MAC_VER_27 ||
+ tp->mac_version == RTL_GIGA_MAC_VER_28) {
while (RTL_R8(TxPoll) & NPQ)
udelay(20);
@@ -3845,8 +3865,7 @@ static void rtl_hw_start_8168(struct net_device *dev)
Cxpl_dbg_sel | \
ASF | \
PktCntrDisable | \
- PCIDAC | \
- PCIMulRW)
+ Mac_dbgo_sel)
static void rtl_hw_start_8102e_1(void __iomem *ioaddr, struct pci_dev *pdev)
{
@@ -3876,8 +3895,6 @@ static void rtl_hw_start_8102e_1(void __iomem *ioaddr, struct pci_dev *pdev)
if ((cfg1 & LEDS0) && (cfg1 & LEDS1))
RTL_W8(Config1, cfg1 & ~LEDS0);
- RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R810X_CPCMD_QUIRK_MASK);
-
rtl_ephy_init(ioaddr, e_info_8102e_1, ARRAY_SIZE(e_info_8102e_1));
}
@@ -3889,8 +3906,6 @@ static void rtl_hw_start_8102e_2(void __iomem *ioaddr, struct pci_dev *pdev)
RTL_W8(Config1, MEMMAP | IOMAP | VPD | PMEnable);
RTL_W8(Config3, RTL_R8(Config3) & ~Beacon_en);
-
- RTL_W16(CPlusCmd, RTL_R16(CPlusCmd) & ~R810X_CPCMD_QUIRK_MASK);
}
static void rtl_hw_start_8102e_3(void __iomem *ioaddr, struct pci_dev *pdev)
@@ -3916,6 +3931,8 @@ static void rtl_hw_start_8101(struct net_device *dev)
}
}
+ RTL_W8(Cfg9346, Cfg9346_Unlock);
+
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_07:
rtl_hw_start_8102e_1(ioaddr, pdev);
@@ -3930,14 +3947,13 @@ static void rtl_hw_start_8101(struct net_device *dev)
break;
}
- RTL_W8(Cfg9346, Cfg9346_Unlock);
+ RTL_W8(Cfg9346, Cfg9346_Lock);
RTL_W8(MaxTxPacketSize, TxPacketMax);
rtl_set_rx_max_size(ioaddr, rx_buf_sz);
- tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
-
+ tp->cp_cmd &= ~R810X_CPCMD_QUIRK_MASK;
RTL_W16(CPlusCmd, tp->cp_cmd);
RTL_W16(IntrMitigate, 0x0000);
@@ -3947,14 +3963,10 @@ static void rtl_hw_start_8101(struct net_device *dev)
RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
rtl_set_rx_tx_config_registers(tp);
- RTL_W8(Cfg9346, Cfg9346_Lock);
-
RTL_R8(IntrMask);
rtl_set_rx_mode(dev);
- RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb);
-
RTL_W16(MultiIntr, RTL_R16(MultiIntr) & 0xf000);
RTL_W16(IntrMask, tp->intr_event);
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index 0e8bb19ed60d..ca886d98bdc7 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -569,9 +569,14 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
struct ethtool_test *test, u64 *data)
{
struct efx_nic *efx = netdev_priv(net_dev);
- struct efx_self_tests efx_tests;
+ struct efx_self_tests *efx_tests;
int already_up;
- int rc;
+ int rc = -ENOMEM;
+
+ efx_tests = kzalloc(sizeof(*efx_tests), GFP_KERNEL);
+ if (!efx_tests)
+ goto fail;
+
ASSERT_RTNL();
if (efx->state != STATE_RUNNING) {
@@ -589,13 +594,11 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
if (rc) {
netif_err(efx, drv, efx->net_dev,
"failed opening device.\n");
- goto fail2;
+ goto fail1;
}
}
- memset(&efx_tests, 0, sizeof(efx_tests));
-
- rc = efx_selftest(efx, &efx_tests, test->flags);
+ rc = efx_selftest(efx, efx_tests, test->flags);
if (!already_up)
dev_close(efx->net_dev);
@@ -604,10 +607,11 @@ static void efx_ethtool_self_test(struct net_device *net_dev,
rc == 0 ? "passed" : "failed",
(test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on");
- fail2:
- fail1:
+fail1:
/* Fill ethtool results structures */
- efx_ethtool_fill_self_tests(efx, &efx_tests, NULL, data);
+ efx_ethtool_fill_self_tests(efx, efx_tests, NULL, data);
+ kfree(efx_tests);
+fail:
if (rc)
test->flags |= ETH_TEST_FL_FAILED;
}
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 42daf98ba736..35b28f42d208 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -3856,9 +3856,6 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port,
memcpy_fromio(dev->dev_addr, hw->regs + B2_MAC_1 + port*8, ETH_ALEN);
memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
- /* device is off until link detection */
- netif_carrier_off(dev);
-
return dev;
}
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index 64bfdae5956f..d70bde95460b 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -1178,6 +1178,11 @@ static int smsc911x_open(struct net_device *dev)
smsc911x_reg_write(pdata, HW_CFG, 0x00050000);
smsc911x_reg_write(pdata, AFC_CFG, 0x006E3740);
+ /* Increase the legal frame size of VLAN tagged frames to 1522 bytes */
+ spin_lock_irq(&pdata->mac_lock);
+ smsc911x_mac_write(pdata, VLAN1, ETH_P_8021Q);
+ spin_unlock_irq(&pdata->mac_lock);
+
/* Make sure EEPROM has finished loading before setting GPIO_CFG */
timeout = 50;
while ((smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_) &&
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index 34a0af3837f9..0e5f03135b50 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -1560,8 +1560,10 @@ static int stmmac_mac_device_setup(struct net_device *dev)
priv->hw = device;
- if (device_can_wakeup(priv->device))
+ if (device_can_wakeup(priv->device)) {
priv->wolopts = WAKE_MAGIC; /* Magic Frame as default */
+ enable_irq_wake(dev->irq);
+ }
return 0;
}
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 93b32d366611..06c0e5033656 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -11158,7 +11158,9 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES)
break; /* We have no PHY */
- if (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER)
+ if ((tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) ||
+ ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) &&
+ !netif_running(dev)))
return -EAGAIN;
spin_lock_bh(&tp->lock);
@@ -11174,7 +11176,9 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES)
break; /* We have no PHY */
- if (tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER)
+ if ((tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) ||
+ ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) &&
+ !netif_running(dev)))
return -EAGAIN;
spin_lock_bh(&tp->lock);
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index 02b622e3b9fb..5002f5be47be 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -651,6 +651,10 @@ static const struct usb_device_id products[] = {
.driver_info = (unsigned long)&dm9601_info,
},
{
+ USB_DEVICE(0x0fe6, 0x9700), /* DM9601 USB to Fast Ethernet Adapter */
+ .driver_info = (unsigned long)&dm9601_info,
+ },
+ {
USB_DEVICE(0x0a46, 0x9000), /* DM9000E */
.driver_info = (unsigned long)&dm9601_info,
},
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index bed8fcedff49..6d83812603b6 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2628,15 +2628,15 @@ exit:
static void hso_free_tiomget(struct hso_serial *serial)
{
- struct hso_tiocmget *tiocmget = serial->tiocmget;
+ struct hso_tiocmget *tiocmget;
+ if (!serial)
+ return;
+ tiocmget = serial->tiocmget;
if (tiocmget) {
- if (tiocmget->urb) {
- usb_free_urb(tiocmget->urb);
- tiocmget->urb = NULL;
- }
+ usb_free_urb(tiocmget->urb);
+ tiocmget->urb = NULL;
serial->tiocmget = NULL;
kfree(tiocmget);
-
}
}
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index ed9a41643ff4..95c41d56631c 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -931,8 +931,10 @@ fail_halt:
if (urb != NULL) {
clear_bit (EVENT_RX_MEMORY, &dev->flags);
status = usb_autopm_get_interface(dev->intf);
- if (status < 0)
+ if (status < 0) {
+ usb_free_urb(urb);
goto fail_lowmem;
+ }
if (rx_submit (dev, urb, GFP_KERNEL) == -ENOLINK)
resched = 0;
usb_autopm_put_interface(dev->intf);
diff --git a/drivers/net/wireless/ath/ath5k/phy.c b/drivers/net/wireless/ath/ath5k/phy.c
index 78c26fdccad1..62ce2f4e8605 100644
--- a/drivers/net/wireless/ath/ath5k/phy.c
+++ b/drivers/net/wireless/ath/ath5k/phy.c
@@ -282,6 +282,34 @@ int ath5k_hw_phy_disable(struct ath5k_hw *ah)
return 0;
}
+/*
+ * Wait for synth to settle
+ */
+static void ath5k_hw_wait_for_synth(struct ath5k_hw *ah,
+ struct ieee80211_channel *channel)
+{
+ /*
+ * On 5211+ read activation -> rx delay
+ * and use it (100ns steps).
+ */
+ if (ah->ah_version != AR5K_AR5210) {
+ u32 delay;
+ delay = ath5k_hw_reg_read(ah, AR5K_PHY_RX_DELAY) &
+ AR5K_PHY_RX_DELAY_M;
+ delay = (channel->hw_value & CHANNEL_CCK) ?
+ ((delay << 2) / 22) : (delay / 10);
+ if (ah->ah_bwmode == AR5K_BWMODE_10MHZ)
+ delay = delay << 1;
+ if (ah->ah_bwmode == AR5K_BWMODE_5MHZ)
+ delay = delay << 2;
+ /* XXX: /2 on turbo ? Let's be safe
+ * for now */
+ udelay(100 + delay);
+ } else {
+ mdelay(1);
+ }
+}
+
/**********************\
* RF Gain optimization *
@@ -1253,6 +1281,7 @@ static int ath5k_hw_channel(struct ath5k_hw *ah,
case AR5K_RF5111:
ret = ath5k_hw_rf5111_channel(ah, channel);
break;
+ case AR5K_RF2317:
case AR5K_RF2425:
ret = ath5k_hw_rf2425_channel(ah, channel);
break;
@@ -3237,6 +3266,13 @@ int ath5k_hw_phy_init(struct ath5k_hw *ah, struct ieee80211_channel *channel,
/* Failed */
if (i >= 100)
return -EIO;
+
+ /* Set channel and wait for synth */
+ ret = ath5k_hw_channel(ah, channel);
+ if (ret)
+ return ret;
+
+ ath5k_hw_wait_for_synth(ah, channel);
}
/*
@@ -3251,13 +3287,53 @@ int ath5k_hw_phy_init(struct ath5k_hw *ah, struct ieee80211_channel *channel,
if (ret)
return ret;
+ /* Write OFDM timings on 5212*/
+ if (ah->ah_version == AR5K_AR5212 &&
+ channel->hw_value & CHANNEL_OFDM) {
+
+ ret = ath5k_hw_write_ofdm_timings(ah, channel);
+ if (ret)
+ return ret;
+
+ /* Spur info is available only from EEPROM versions
+ * greater than 5.3, but the EEPROM routines will use
+ * static values for older versions */
+ if (ah->ah_mac_srev >= AR5K_SREV_AR5424)
+ ath5k_hw_set_spur_mitigation_filter(ah,
+ channel);
+ }
+
+ /* If we used fast channel switching
+ * we are done, release RF bus and
+ * fire up NF calibration.
+ *
+ * Note: Only NF calibration due to
+ * channel change, not AGC calibration
+ * since AGC is still running !
+ */
+ if (fast) {
+ /*
+ * Release RF Bus grant
+ */
+ AR5K_REG_DISABLE_BITS(ah, AR5K_PHY_RFBUS_REQ,
+ AR5K_PHY_RFBUS_REQ_REQUEST);
+
+ /*
+ * Start NF calibration
+ */
+ AR5K_REG_ENABLE_BITS(ah, AR5K_PHY_AGCCTL,
+ AR5K_PHY_AGCCTL_NF);
+
+ return ret;
+ }
+
/*
* For 5210 we do all initialization using
* initvals, so we don't have to modify
* any settings (5210 also only supports
* a/aturbo modes)
*/
- if ((ah->ah_version != AR5K_AR5210) && !fast) {
+ if (ah->ah_version != AR5K_AR5210) {
/*
* Write initial RF gain settings
@@ -3276,22 +3352,6 @@ int ath5k_hw_phy_init(struct ath5k_hw *ah, struct ieee80211_channel *channel,
if (ret)
return ret;
- /* Write OFDM timings on 5212*/
- if (ah->ah_version == AR5K_AR5212 &&
- channel->hw_value & CHANNEL_OFDM) {
-
- ret = ath5k_hw_write_ofdm_timings(ah, channel);
- if (ret)
- return ret;
-
- /* Spur info is available only from EEPROM versions
- * greater than 5.3, but the EEPROM routines will use
- * static values for older versions */
- if (ah->ah_mac_srev >= AR5K_SREV_AR5424)
- ath5k_hw_set_spur_mitigation_filter(ah,
- channel);
- }
-
/*Enable/disable 802.11b mode on 5111
(enable 2111 frequency converter + CCK)*/
if (ah->ah_radio == AR5K_RF5111) {
@@ -3322,47 +3382,20 @@ int ath5k_hw_phy_init(struct ath5k_hw *ah, struct ieee80211_channel *channel,
*/
ath5k_hw_reg_write(ah, AR5K_PHY_ACT_ENABLE, AR5K_PHY_ACT);
+ ath5k_hw_wait_for_synth(ah, channel);
+
/*
- * On 5211+ read activation -> rx delay
- * and use it.
+ * Perform ADC test to see if baseband is ready
+ * Set tx hold and check adc test register
*/
- if (ah->ah_version != AR5K_AR5210) {
- u32 delay;
- delay = ath5k_hw_reg_read(ah, AR5K_PHY_RX_DELAY) &
- AR5K_PHY_RX_DELAY_M;
- delay = (channel->hw_value & CHANNEL_CCK) ?
- ((delay << 2) / 22) : (delay / 10);
- if (ah->ah_bwmode == AR5K_BWMODE_10MHZ)
- delay = delay << 1;
- if (ah->ah_bwmode == AR5K_BWMODE_5MHZ)
- delay = delay << 2;
- /* XXX: /2 on turbo ? Let's be safe
- * for now */
- udelay(100 + delay);
- } else {
- mdelay(1);
- }
-
- if (fast)
- /*
- * Release RF Bus grant
- */
- AR5K_REG_DISABLE_BITS(ah, AR5K_PHY_RFBUS_REQ,
- AR5K_PHY_RFBUS_REQ_REQUEST);
- else {
- /*
- * Perform ADC test to see if baseband is ready
- * Set tx hold and check adc test register
- */
- phy_tst1 = ath5k_hw_reg_read(ah, AR5K_PHY_TST1);
- ath5k_hw_reg_write(ah, AR5K_PHY_TST1_TXHOLD, AR5K_PHY_TST1);
- for (i = 0; i <= 20; i++) {
- if (!(ath5k_hw_reg_read(ah, AR5K_PHY_ADC_TEST) & 0x10))
- break;
- udelay(200);
- }
- ath5k_hw_reg_write(ah, phy_tst1, AR5K_PHY_TST1);
+ phy_tst1 = ath5k_hw_reg_read(ah, AR5K_PHY_TST1);
+ ath5k_hw_reg_write(ah, AR5K_PHY_TST1_TXHOLD, AR5K_PHY_TST1);
+ for (i = 0; i <= 20; i++) {
+ if (!(ath5k_hw_reg_read(ah, AR5K_PHY_ADC_TEST) & 0x10))
+ break;
+ udelay(200);
}
+ ath5k_hw_reg_write(ah, phy_tst1, AR5K_PHY_TST1);
/*
* Start automatic gain control calibration
diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h
index 23838e37d45f..1a7fa6ea4cf5 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -21,7 +21,6 @@
#include <linux/device.h>
#include <linux/leds.h>
#include <linux/completion.h>
-#include <linux/pm_qos_params.h>
#include "debug.h"
#include "common.h"
@@ -57,8 +56,6 @@ struct ath_node;
#define A_MAX(a, b) ((a) > (b) ? (a) : (b))
-#define ATH9K_PM_QOS_DEFAULT_VALUE 55
-
#define TSF_TO_TU(_h,_l) \
((((u32)(_h)) << 22) | (((u32)(_l)) >> 10))
@@ -633,8 +630,6 @@ struct ath_softc {
struct ath_descdma txsdma;
struct ath_ant_comb ant_comb;
-
- struct pm_qos_request_list pm_qos_req;
};
struct ath_wiphy {
@@ -666,7 +661,6 @@ static inline void ath_read_cachesize(struct ath_common *common, int *csz)
extern struct ieee80211_ops ath9k_ops;
extern int ath9k_modparam_nohwcrypt;
extern int led_blink;
-extern int ath9k_pm_qos_value;
extern bool is_ath9k_unloaded;
irqreturn_t ath_isr(int irq, void *dev);
diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
index 5ab3084eb9cb..07b1633b7f3f 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
@@ -219,8 +219,9 @@ static int __hif_usb_tx(struct hif_device_usb *hif_dev)
struct tx_buf *tx_buf = NULL;
struct sk_buff *nskb = NULL;
int ret = 0, i;
- u16 *hdr, tx_skb_cnt = 0;
+ u16 tx_skb_cnt = 0;
u8 *buf;
+ __le16 *hdr;
if (hif_dev->tx.tx_skb_cnt == 0)
return 0;
@@ -245,9 +246,9 @@ static int __hif_usb_tx(struct hif_device_usb *hif_dev)
buf = tx_buf->buf;
buf += tx_buf->offset;
- hdr = (u16 *)buf;
- *hdr++ = nskb->len;
- *hdr++ = ATH_USB_TX_STREAM_MODE_TAG;
+ hdr = (__le16 *)buf;
+ *hdr++ = cpu_to_le16(nskb->len);
+ *hdr++ = cpu_to_le16(ATH_USB_TX_STREAM_MODE_TAG);
buf += 4;
memcpy(buf, nskb->data, nskb->len);
tx_buf->len = nskb->len + 4;
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 087a6a95edd5..a033d01bf8a0 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -41,10 +41,6 @@ static int ath9k_btcoex_enable;
module_param_named(btcoex_enable, ath9k_btcoex_enable, int, 0444);
MODULE_PARM_DESC(btcoex_enable, "Enable wifi-BT coexistence");
-int ath9k_pm_qos_value = ATH9K_PM_QOS_DEFAULT_VALUE;
-module_param_named(pmqos, ath9k_pm_qos_value, int, S_IRUSR | S_IRGRP | S_IROTH);
-MODULE_PARM_DESC(pmqos, "User specified PM-QOS value");
-
bool is_ath9k_unloaded;
/* We use the hw_value as an index into our private channel structure */
@@ -762,9 +758,6 @@ int ath9k_init_device(u16 devid, struct ath_softc *sc, u16 subsysid,
ath_init_leds(sc);
ath_start_rfkill_poll(sc);
- pm_qos_add_request(&sc->pm_qos_req, PM_QOS_CPU_DMA_LATENCY,
- PM_QOS_DEFAULT_VALUE);
-
return 0;
error_world:
@@ -831,7 +824,6 @@ void ath9k_deinit_device(struct ath_softc *sc)
}
ieee80211_unregister_hw(hw);
- pm_qos_remove_request(&sc->pm_qos_req);
ath_rx_cleanup(sc);
ath_tx_cleanup(sc);
ath9k_deinit_softc(sc);
diff --git a/drivers/net/wireless/ath/ath9k/mac.c b/drivers/net/wireless/ath/ath9k/mac.c
index 180170d3ce25..2915b11edefb 100644
--- a/drivers/net/wireless/ath/ath9k/mac.c
+++ b/drivers/net/wireless/ath/ath9k/mac.c
@@ -885,7 +885,7 @@ void ath9k_hw_set_interrupts(struct ath_hw *ah, enum ath9k_int ints)
struct ath_common *common = ath9k_hw_common(ah);
if (!(ints & ATH9K_INT_GLOBAL))
- ath9k_hw_enable_interrupts(ah);
+ ath9k_hw_disable_interrupts(ah);
ath_dbg(common, ATH_DBG_INTERRUPT, "0x%x => 0x%x\n", omask, ints);
@@ -963,7 +963,8 @@ void ath9k_hw_set_interrupts(struct ath_hw *ah, enum ath9k_int ints)
REG_CLR_BIT(ah, AR_IMR_S5, AR_IMR_S5_TIM_TIMER);
}
- ath9k_hw_enable_interrupts(ah);
+ if (ints & ATH9K_INT_GLOBAL)
+ ath9k_hw_enable_interrupts(ah);
return;
}
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index da5c64597c1f..a09d15f7aa6e 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1173,12 +1173,6 @@ static int ath9k_start(struct ieee80211_hw *hw)
ath9k_btcoex_timer_resume(sc);
}
- /* User has the option to provide pm-qos value as a module
- * parameter rather than using the default value of
- * 'ATH9K_PM_QOS_DEFAULT_VALUE'.
- */
- pm_qos_update_request(&sc->pm_qos_req, ath9k_pm_qos_value);
-
if (ah->caps.pcie_lcr_extsync_en && common->bus_ops->extn_synch_en)
common->bus_ops->extn_synch_en(common);
@@ -1345,8 +1339,6 @@ static void ath9k_stop(struct ieee80211_hw *hw)
sc->sc_flags |= SC_OP_INVALID;
- pm_qos_update_request(&sc->pm_qos_req, PM_QOS_DEFAULT_VALUE);
-
mutex_unlock(&sc->mutex);
ath_dbg(common, ATH_DBG_CONFIG, "Driver halt\n");
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index 537732e5964f..f82c400be288 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -118,6 +118,8 @@ static struct usb_device_id carl9170_usb_ids[] = {
{ USB_DEVICE(0x057c, 0x8402) },
/* Qwest/Actiontec 802AIN Wireless N USB Network Adapter */
{ USB_DEVICE(0x1668, 0x1200) },
+ /* Airlive X.USB a/b/g/n */
+ { USB_DEVICE(0x1b75, 0x9170) },
/* terminate */
{}
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index 61915f371416..471a52a2f8d4 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -706,11 +706,10 @@ static void schedule_reset(struct ipw2100_priv *priv)
netif_stop_queue(priv->net_dev);
priv->status |= STATUS_RESET_PENDING;
if (priv->reset_backoff)
- queue_delayed_work(priv->workqueue, &priv->reset_work,
- priv->reset_backoff * HZ);
+ schedule_delayed_work(&priv->reset_work,
+ priv->reset_backoff * HZ);
else
- queue_delayed_work(priv->workqueue, &priv->reset_work,
- 0);
+ schedule_delayed_work(&priv->reset_work, 0);
if (priv->reset_backoff < MAX_RESET_BACKOFF)
priv->reset_backoff++;
@@ -1474,7 +1473,7 @@ static int ipw2100_enable_adapter(struct ipw2100_priv *priv)
if (priv->stop_hang_check) {
priv->stop_hang_check = 0;
- queue_delayed_work(priv->workqueue, &priv->hang_check, HZ / 2);
+ schedule_delayed_work(&priv->hang_check, HZ / 2);
}
fail_up:
@@ -1808,8 +1807,8 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
if (priv->stop_rf_kill) {
priv->stop_rf_kill = 0;
- queue_delayed_work(priv->workqueue, &priv->rf_kill,
- round_jiffies_relative(HZ));
+ schedule_delayed_work(&priv->rf_kill,
+ round_jiffies_relative(HZ));
}
deferred = 1;
@@ -2086,7 +2085,7 @@ static void isr_indicate_associated(struct ipw2100_priv *priv, u32 status)
priv->status |= STATUS_ASSOCIATING;
priv->connect_start = get_seconds();
- queue_delayed_work(priv->workqueue, &priv->wx_event_work, HZ / 10);
+ schedule_delayed_work(&priv->wx_event_work, HZ / 10);
}
static int ipw2100_set_essid(struct ipw2100_priv *priv, char *essid,
@@ -2166,9 +2165,9 @@ static void isr_indicate_association_lost(struct ipw2100_priv *priv, u32 status)
return;
if (priv->status & STATUS_SECURITY_UPDATED)
- queue_delayed_work(priv->workqueue, &priv->security_work, 0);
+ schedule_delayed_work(&priv->security_work, 0);
- queue_delayed_work(priv->workqueue, &priv->wx_event_work, 0);
+ schedule_delayed_work(&priv->wx_event_work, 0);
}
static void isr_indicate_rf_kill(struct ipw2100_priv *priv, u32 status)
@@ -2183,8 +2182,7 @@ static void isr_indicate_rf_kill(struct ipw2100_priv *priv, u32 status)
/* Make sure the RF Kill check timer is running */
priv->stop_rf_kill = 0;
cancel_delayed_work(&priv->rf_kill);
- queue_delayed_work(priv->workqueue, &priv->rf_kill,
- round_jiffies_relative(HZ));
+ schedule_delayed_work(&priv->rf_kill, round_jiffies_relative(HZ));
}
static void send_scan_event(void *data)
@@ -2219,13 +2217,12 @@ static void isr_scan_complete(struct ipw2100_priv *priv, u32 status)
/* Only userspace-requested scan completion events go out immediately */
if (!priv->user_requested_scan) {
if (!delayed_work_pending(&priv->scan_event_later))
- queue_delayed_work(priv->workqueue,
- &priv->scan_event_later,
- round_jiffies_relative(msecs_to_jiffies(4000)));
+ schedule_delayed_work(&priv->scan_event_later,
+ round_jiffies_relative(msecs_to_jiffies(4000)));
} else {
priv->user_requested_scan = 0;
cancel_delayed_work(&priv->scan_event_later);
- queue_work(priv->workqueue, &priv->scan_event_now);
+ schedule_work(&priv->scan_event_now);
}
}
@@ -4329,8 +4326,8 @@ static int ipw_radio_kill_sw(struct ipw2100_priv *priv, int disable_radio)
/* Make sure the RF_KILL check timer is running */
priv->stop_rf_kill = 0;
cancel_delayed_work(&priv->rf_kill);
- queue_delayed_work(priv->workqueue, &priv->rf_kill,
- round_jiffies_relative(HZ));
+ schedule_delayed_work(&priv->rf_kill,
+ round_jiffies_relative(HZ));
} else
schedule_reset(priv);
}
@@ -4461,20 +4458,17 @@ static void bd_queue_initialize(struct ipw2100_priv *priv,
IPW_DEBUG_INFO("exit\n");
}
-static void ipw2100_kill_workqueue(struct ipw2100_priv *priv)
+static void ipw2100_kill_works(struct ipw2100_priv *priv)
{
- if (priv->workqueue) {
- priv->stop_rf_kill = 1;
- priv->stop_hang_check = 1;
- cancel_delayed_work(&priv->reset_work);
- cancel_delayed_work(&priv->security_work);
- cancel_delayed_work(&priv->wx_event_work);
- cancel_delayed_work(&priv->hang_check);
- cancel_delayed_work(&priv->rf_kill);
- cancel_delayed_work(&priv->scan_event_later);
- destroy_workqueue(priv->workqueue);
- priv->workqueue = NULL;
- }
+ priv->stop_rf_kill = 1;
+ priv->stop_hang_check = 1;
+ cancel_delayed_work_sync(&priv->reset_work);
+ cancel_delayed_work_sync(&priv->security_work);
+ cancel_delayed_work_sync(&priv->wx_event_work);
+ cancel_delayed_work_sync(&priv->hang_check);
+ cancel_delayed_work_sync(&priv->rf_kill);
+ cancel_work_sync(&priv->scan_event_now);
+ cancel_delayed_work_sync(&priv->scan_event_later);
}
static int ipw2100_tx_allocate(struct ipw2100_priv *priv)
@@ -6046,7 +6040,7 @@ static void ipw2100_hang_check(struct work_struct *work)
priv->last_rtc = rtc;
if (!priv->stop_hang_check)
- queue_delayed_work(priv->workqueue, &priv->hang_check, HZ / 2);
+ schedule_delayed_work(&priv->hang_check, HZ / 2);
spin_unlock_irqrestore(&priv->low_lock, flags);
}
@@ -6062,8 +6056,8 @@ static void ipw2100_rf_kill(struct work_struct *work)
if (rf_kill_active(priv)) {
IPW_DEBUG_RF_KILL("RF Kill active, rescheduling GPIO check\n");
if (!priv->stop_rf_kill)
- queue_delayed_work(priv->workqueue, &priv->rf_kill,
- round_jiffies_relative(HZ));
+ schedule_delayed_work(&priv->rf_kill,
+ round_jiffies_relative(HZ));
goto exit_unlock;
}
@@ -6209,8 +6203,6 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev,
INIT_LIST_HEAD(&priv->fw_pend_list);
INIT_STAT(&priv->fw_pend_stat);
- priv->workqueue = create_workqueue(DRV_NAME);
-
INIT_DELAYED_WORK(&priv->reset_work, ipw2100_reset_adapter);
INIT_DELAYED_WORK(&priv->security_work, ipw2100_security_work);
INIT_DELAYED_WORK(&priv->wx_event_work, ipw2100_wx_event_work);
@@ -6410,7 +6402,7 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev,
if (dev->irq)
free_irq(dev->irq, priv);
- ipw2100_kill_workqueue(priv);
+ ipw2100_kill_works(priv);
/* These are safe to call even if they weren't allocated */
ipw2100_queues_free(priv);
@@ -6460,9 +6452,7 @@ static void __devexit ipw2100_pci_remove_one(struct pci_dev *pci_dev)
* first, then close() will crash. */
unregister_netdev(dev);
- /* ipw2100_down will ensure that there is no more pending work
- * in the workqueue's, so we can safely remove them now. */
- ipw2100_kill_workqueue(priv);
+ ipw2100_kill_works(priv);
ipw2100_queues_free(priv);
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.h b/drivers/net/wireless/ipw2x00/ipw2100.h
index 838002b4881e..99cba968aa58 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.h
+++ b/drivers/net/wireless/ipw2x00/ipw2100.h
@@ -580,7 +580,6 @@ struct ipw2100_priv {
struct tasklet_struct irq_tasklet;
- struct workqueue_struct *workqueue;
struct delayed_work reset_work;
struct delayed_work security_work;
struct delayed_work wx_event_work;
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index ae438ed80c2f..160881f234cc 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -894,9 +894,8 @@ static void ipw_led_link_on(struct ipw_priv *priv)
/* If we aren't associated, schedule turning the LED off */
if (!(priv->status & STATUS_ASSOCIATED))
- queue_delayed_work(priv->workqueue,
- &priv->led_link_off,
- LD_TIME_LINK_ON);
+ schedule_delayed_work(&priv->led_link_off,
+ LD_TIME_LINK_ON);
}
spin_unlock_irqrestore(&priv->lock, flags);
@@ -939,8 +938,8 @@ static void ipw_led_link_off(struct ipw_priv *priv)
* turning the LED on (blink while unassociated) */
if (!(priv->status & STATUS_RF_KILL_MASK) &&
!(priv->status & STATUS_ASSOCIATED))
- queue_delayed_work(priv->workqueue, &priv->led_link_on,
- LD_TIME_LINK_OFF);
+ schedule_delayed_work(&priv->led_link_on,
+ LD_TIME_LINK_OFF);
}
@@ -980,13 +979,11 @@ static void __ipw_led_activity_on(struct ipw_priv *priv)
priv->status |= STATUS_LED_ACT_ON;
cancel_delayed_work(&priv->led_act_off);
- queue_delayed_work(priv->workqueue, &priv->led_act_off,
- LD_TIME_ACT_ON);
+ schedule_delayed_work(&priv->led_act_off, LD_TIME_ACT_ON);
} else {
/* Reschedule LED off for full time period */
cancel_delayed_work(&priv->led_act_off);
- queue_delayed_work(priv->workqueue, &priv->led_act_off,
- LD_TIME_ACT_ON);
+ schedule_delayed_work(&priv->led_act_off, LD_TIME_ACT_ON);
}
}
@@ -1795,13 +1792,11 @@ static int ipw_radio_kill_sw(struct ipw_priv *priv, int disable_radio)
if (disable_radio) {
priv->status |= STATUS_RF_KILL_SW;
- if (priv->workqueue) {
- cancel_delayed_work(&priv->request_scan);
- cancel_delayed_work(&priv->request_direct_scan);
- cancel_delayed_work(&priv->request_passive_scan);
- cancel_delayed_work(&priv->scan_event);
- }
- queue_work(priv->workqueue, &priv->down);
+ cancel_delayed_work(&priv->request_scan);
+ cancel_delayed_work(&priv->request_direct_scan);
+ cancel_delayed_work(&priv->request_passive_scan);
+ cancel_delayed_work(&priv->scan_event);
+ schedule_work(&priv->down);
} else {
priv->status &= ~STATUS_RF_KILL_SW;
if (rf_kill_active(priv)) {
@@ -1809,10 +1804,10 @@ static int ipw_radio_kill_sw(struct ipw_priv *priv, int disable_radio)
"disabled by HW switch\n");
/* Make sure the RF_KILL check timer is running */
cancel_delayed_work(&priv->rf_kill);
- queue_delayed_work(priv->workqueue, &priv->rf_kill,
- round_jiffies_relative(2 * HZ));
+ schedule_delayed_work(&priv->rf_kill,
+ round_jiffies_relative(2 * HZ));
} else
- queue_work(priv->workqueue, &priv->up);
+ schedule_work(&priv->up);
}
return 1;
@@ -2063,7 +2058,7 @@ static void ipw_irq_tasklet(struct ipw_priv *priv)
cancel_delayed_work(&priv->request_passive_scan);
cancel_delayed_work(&priv->scan_event);
schedule_work(&priv->link_down);
- queue_delayed_work(priv->workqueue, &priv->rf_kill, 2 * HZ);
+ schedule_delayed_work(&priv->rf_kill, 2 * HZ);
handled |= IPW_INTA_BIT_RF_KILL_DONE;
}
@@ -2103,7 +2098,7 @@ static void ipw_irq_tasklet(struct ipw_priv *priv)
priv->status &= ~STATUS_HCMD_ACTIVE;
wake_up_interruptible(&priv->wait_command_queue);
- queue_work(priv->workqueue, &priv->adapter_restart);
+ schedule_work(&priv->adapter_restart);
handled |= IPW_INTA_BIT_FATAL_ERROR;
}
@@ -2323,11 +2318,6 @@ static int ipw_send_adapter_address(struct ipw_priv *priv, u8 * mac)
return ipw_send_cmd_pdu(priv, IPW_CMD_ADAPTER_ADDRESS, ETH_ALEN, mac);
}
-/*
- * NOTE: This must be executed from our workqueue as it results in udelay
- * being called which may corrupt the keyboard if executed on default
- * workqueue
- */
static void ipw_adapter_restart(void *adapter)
{
struct ipw_priv *priv = adapter;
@@ -2368,13 +2358,13 @@ static void ipw_scan_check(void *data)
IPW_DEBUG_SCAN("Scan completion watchdog resetting "
"adapter after (%dms).\n",
jiffies_to_msecs(IPW_SCAN_CHECK_WATCHDOG));
- queue_work(priv->workqueue, &priv->adapter_restart);
+ schedule_work(&priv->adapter_restart);
} else if (priv->status & STATUS_SCANNING) {
IPW_DEBUG_SCAN("Scan completion watchdog aborting scan "
"after (%dms).\n",
jiffies_to_msecs(IPW_SCAN_CHECK_WATCHDOG));
ipw_abort_scan(priv);
- queue_delayed_work(priv->workqueue, &priv->scan_check, HZ);
+ schedule_delayed_work(&priv->scan_check, HZ);
}
}
@@ -3943,7 +3933,7 @@ static void ipw_send_disassociate(struct ipw_priv *priv, int quiet)
if (priv->status & STATUS_ASSOCIATING) {
IPW_DEBUG_ASSOC("Disassociating while associating.\n");
- queue_work(priv->workqueue, &priv->disassociate);
+ schedule_work(&priv->disassociate);
return;
}
@@ -4360,8 +4350,7 @@ static void ipw_gather_stats(struct ipw_priv *priv)
priv->quality = quality;
- queue_delayed_work(priv->workqueue, &priv->gather_stats,
- IPW_STATS_INTERVAL);
+ schedule_delayed_work(&priv->gather_stats, IPW_STATS_INTERVAL);
}
static void ipw_bg_gather_stats(struct work_struct *work)
@@ -4396,10 +4385,10 @@ static void ipw_handle_missed_beacon(struct ipw_priv *priv,
IPW_DEBUG(IPW_DL_INFO | IPW_DL_NOTIF |
IPW_DL_STATE,
"Aborting scan with missed beacon.\n");
- queue_work(priv->workqueue, &priv->abort_scan);
+ schedule_work(&priv->abort_scan);
}
- queue_work(priv->workqueue, &priv->disassociate);
+ schedule_work(&priv->disassociate);
return;
}
@@ -4425,8 +4414,7 @@ static void ipw_handle_missed_beacon(struct ipw_priv *priv,
if (!(priv->status & STATUS_ROAMING)) {
priv->status |= STATUS_ROAMING;
if (!(priv->status & STATUS_SCANNING))
- queue_delayed_work(priv->workqueue,
- &priv->request_scan, 0);
+ schedule_delayed_work(&priv->request_scan, 0);
}
return;
}
@@ -4439,7 +4427,7 @@ static void ipw_handle_missed_beacon(struct ipw_priv *priv,
* channels..) */
IPW_DEBUG(IPW_DL_INFO | IPW_DL_NOTIF | IPW_DL_STATE,
"Aborting scan with missed beacon.\n");
- queue_work(priv->workqueue, &priv->abort_scan);
+ schedule_work(&priv->abort_scan);
}
IPW_DEBUG_NOTIF("Missed beacon: %d\n", missed_count);
@@ -4462,8 +4450,8 @@ static void handle_scan_event(struct ipw_priv *priv)
/* Only userspace-requested scan completion events go out immediately */
if (!priv->user_requested_scan) {
if (!delayed_work_pending(&priv->scan_event))
- queue_delayed_work(priv->workqueue, &priv->scan_event,
- round_jiffies_relative(msecs_to_jiffies(4000)));
+ schedule_delayed_work(&priv->scan_event,
+ round_jiffies_relative(msecs_to_jiffies(4000)));
} else {
union iwreq_data wrqu;
@@ -4516,20 +4504,17 @@ static void ipw_rx_notification(struct ipw_priv *priv,
IPW_DEBUG_ASSOC
("queueing adhoc check\n");
- queue_delayed_work(priv->
- workqueue,
- &priv->
- adhoc_check,
- le16_to_cpu(priv->
- assoc_request.
- beacon_interval));
+ schedule_delayed_work(
+ &priv->adhoc_check,
+ le16_to_cpu(priv->
+ assoc_request.
+ beacon_interval));
break;
}
priv->status &= ~STATUS_ASSOCIATING;
priv->status |= STATUS_ASSOCIATED;
- queue_work(priv->workqueue,
- &priv->system_config);
+ schedule_work(&priv->system_config);
#ifdef CONFIG_IPW2200_QOS
#define IPW_GET_PACKET_STYPE(x) WLAN_FC_GET_STYPE( \
@@ -4792,43 +4777,37 @@ static void ipw_rx_notification(struct ipw_priv *priv,
#ifdef CONFIG_IPW2200_MONITOR
if (priv->ieee->iw_mode == IW_MODE_MONITOR) {
priv->status |= STATUS_SCAN_FORCED;
- queue_delayed_work(priv->workqueue,
- &priv->request_scan, 0);
+ schedule_delayed_work(&priv->request_scan, 0);
break;
}
priv->status &= ~STATUS_SCAN_FORCED;
#endif /* CONFIG_IPW2200_MONITOR */
/* Do queued direct scans first */
- if (priv->status & STATUS_DIRECT_SCAN_PENDING) {
- queue_delayed_work(priv->workqueue,
- &priv->request_direct_scan, 0);
- }
+ if (priv->status & STATUS_DIRECT_SCAN_PENDING)
+ schedule_delayed_work(&priv->request_direct_scan, 0);
if (!(priv->status & (STATUS_ASSOCIATED |
STATUS_ASSOCIATING |
STATUS_ROAMING |
STATUS_DISASSOCIATING)))
- queue_work(priv->workqueue, &priv->associate);
+ schedule_work(&priv->associate);
else if (priv->status & STATUS_ROAMING) {
if (x->status == SCAN_COMPLETED_STATUS_COMPLETE)
/* If a scan completed and we are in roam mode, then
* the scan that completed was the one requested as a
* result of entering roam... so, schedule the
* roam work */
- queue_work(priv->workqueue,
- &priv->roam);
+ schedule_work(&priv->roam);
else
/* Don't schedule if we aborted the scan */
priv->status &= ~STATUS_ROAMING;
} else if (priv->status & STATUS_SCAN_PENDING)
- queue_delayed_work(priv->workqueue,
- &priv->request_scan, 0);
+ schedule_delayed_work(&priv->request_scan, 0);
else if (priv->config & CFG_BACKGROUND_SCAN
&& priv->status & STATUS_ASSOCIATED)
- queue_delayed_work(priv->workqueue,
- &priv->request_scan,
- round_jiffies_relative(HZ));
+ schedule_delayed_work(&priv->request_scan,
+ round_jiffies_relative(HZ));
/* Send an empty event to user space.
* We don't send the received data on the event because
@@ -5192,7 +5171,7 @@ static void ipw_rx_queue_restock(struct ipw_priv *priv)
/* If the pre-allocated buffer pool is dropping low, schedule to
* refill it */
if (rxq->free_count <= RX_LOW_WATERMARK)
- queue_work(priv->workqueue, &priv->rx_replenish);
+ schedule_work(&priv->rx_replenish);
/* If we've added more space for the firmware to place data, tell it */
if (write != rxq->write)
@@ -6133,8 +6112,8 @@ static void ipw_adhoc_check(void *data)
return;
}
- queue_delayed_work(priv->workqueue, &priv->adhoc_check,
- le16_to_cpu(priv->assoc_request.beacon_interval));
+ schedule_delayed_work(&priv->adhoc_check,
+ le16_to_cpu(priv->assoc_request.beacon_interval));
}
static void ipw_bg_adhoc_check(struct work_struct *work)
@@ -6523,8 +6502,7 @@ send_request:
} else
priv->status &= ~STATUS_SCAN_PENDING;
- queue_delayed_work(priv->workqueue, &priv->scan_check,
- IPW_SCAN_CHECK_WATCHDOG);
+ schedule_delayed_work(&priv->scan_check, IPW_SCAN_CHECK_WATCHDOG);
done:
mutex_unlock(&priv->mutex);
return err;
@@ -6994,8 +6972,7 @@ static int ipw_qos_handle_probe_response(struct ipw_priv *priv,
!memcmp(network->ssid,
priv->assoc_network->ssid,
network->ssid_len)) {
- queue_work(priv->workqueue,
- &priv->merge_networks);
+ schedule_work(&priv->merge_networks);
}
}
@@ -7663,7 +7640,7 @@ static int ipw_associate(void *data)
if (priv->status & STATUS_DISASSOCIATING) {
IPW_DEBUG_ASSOC("Not attempting association (in "
"disassociating)\n ");
- queue_work(priv->workqueue, &priv->associate);
+ schedule_work(&priv->associate);
return 0;
}
@@ -7731,12 +7708,10 @@ static int ipw_associate(void *data)
if (!(priv->status & STATUS_SCANNING)) {
if (!(priv->config & CFG_SPEED_SCAN))
- queue_delayed_work(priv->workqueue,
- &priv->request_scan,
- SCAN_INTERVAL);
+ schedule_delayed_work(&priv->request_scan,
+ SCAN_INTERVAL);
else
- queue_delayed_work(priv->workqueue,
- &priv->request_scan, 0);
+ schedule_delayed_work(&priv->request_scan, 0);
}
return 0;
@@ -8899,7 +8874,7 @@ static int ipw_wx_set_mode(struct net_device *dev,
priv->ieee->iw_mode = wrqu->mode;
- queue_work(priv->workqueue, &priv->adapter_restart);
+ schedule_work(&priv->adapter_restart);
mutex_unlock(&priv->mutex);
return err;
}
@@ -9598,7 +9573,7 @@ static int ipw_wx_set_scan(struct net_device *dev,
IPW_DEBUG_WX("Start scan\n");
- queue_delayed_work(priv->workqueue, work, 0);
+ schedule_delayed_work(work, 0);
return 0;
}
@@ -9937,7 +9912,7 @@ static int ipw_wx_set_monitor(struct net_device *dev,
#else
priv->net_dev->type = ARPHRD_IEEE80211;
#endif
- queue_work(priv->workqueue, &priv->adapter_restart);
+ schedule_work(&priv->adapter_restart);
}
ipw_set_channel(priv, parms[1]);
@@ -9947,7 +9922,7 @@ static int ipw_wx_set_monitor(struct net_device *dev,
return 0;
}
priv->net_dev->type = ARPHRD_ETHER;
- queue_work(priv->workqueue, &priv->adapter_restart);
+ schedule_work(&priv->adapter_restart);
}
mutex_unlock(&priv->mutex);
return 0;
@@ -9961,7 +9936,7 @@ static int ipw_wx_reset(struct net_device *dev,
{
struct ipw_priv *priv = libipw_priv(dev);
IPW_DEBUG_WX("RESET\n");
- queue_work(priv->workqueue, &priv->adapter_restart);
+ schedule_work(&priv->adapter_restart);
return 0;
}
@@ -10551,7 +10526,7 @@ static int ipw_net_set_mac_address(struct net_device *dev, void *p)
memcpy(priv->mac_addr, addr->sa_data, ETH_ALEN);
printk(KERN_INFO "%s: Setting MAC to %pM\n",
priv->net_dev->name, priv->mac_addr);
- queue_work(priv->workqueue, &priv->adapter_restart);
+ schedule_work(&priv->adapter_restart);
mutex_unlock(&priv->mutex);
return 0;
}
@@ -10684,9 +10659,7 @@ static void ipw_rf_kill(void *adapter)
if (rf_kill_active(priv)) {
IPW_DEBUG_RF_KILL("RF Kill active, rescheduling GPIO check\n");
- if (priv->workqueue)
- queue_delayed_work(priv->workqueue,
- &priv->rf_kill, 2 * HZ);
+ schedule_delayed_work(&priv->rf_kill, 2 * HZ);
goto exit_unlock;
}
@@ -10697,7 +10670,7 @@ static void ipw_rf_kill(void *adapter)
"device\n");
/* we can not do an adapter restart while inside an irq lock */
- queue_work(priv->workqueue, &priv->adapter_restart);
+ schedule_work(&priv->adapter_restart);
} else
IPW_DEBUG_RF_KILL("HW RF Kill deactivated. SW RF Kill still "
"enabled\n");
@@ -10735,7 +10708,7 @@ static void ipw_link_up(struct ipw_priv *priv)
notify_wx_assoc_event(priv);
if (priv->config & CFG_BACKGROUND_SCAN)
- queue_delayed_work(priv->workqueue, &priv->request_scan, HZ);
+ schedule_delayed_work(&priv->request_scan, HZ);
}
static void ipw_bg_link_up(struct work_struct *work)
@@ -10764,7 +10737,7 @@ static void ipw_link_down(struct ipw_priv *priv)
if (!(priv->status & STATUS_EXIT_PENDING)) {
/* Queue up another scan... */
- queue_delayed_work(priv->workqueue, &priv->request_scan, 0);
+ schedule_delayed_work(&priv->request_scan, 0);
} else
cancel_delayed_work(&priv->scan_event);
}
@@ -10782,7 +10755,6 @@ static int __devinit ipw_setup_deferred_work(struct ipw_priv *priv)
{
int ret = 0;
- priv->workqueue = create_workqueue(DRV_NAME);
init_waitqueue_head(&priv->wait_command_queue);
init_waitqueue_head(&priv->wait_state);
@@ -11339,8 +11311,7 @@ static int ipw_up(struct ipw_priv *priv)
IPW_WARNING("Radio Frequency Kill Switch is On:\n"
"Kill switch must be turned off for "
"wireless networking to work.\n");
- queue_delayed_work(priv->workqueue, &priv->rf_kill,
- 2 * HZ);
+ schedule_delayed_work(&priv->rf_kill, 2 * HZ);
return 0;
}
@@ -11350,8 +11321,7 @@ static int ipw_up(struct ipw_priv *priv)
/* If configure to try and auto-associate, kick
* off a scan. */
- queue_delayed_work(priv->workqueue,
- &priv->request_scan, 0);
+ schedule_delayed_work(&priv->request_scan, 0);
return 0;
}
@@ -11817,7 +11787,7 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev,
err = request_irq(pdev->irq, ipw_isr, IRQF_SHARED, DRV_NAME, priv);
if (err) {
IPW_ERROR("Error allocating IRQ %d\n", pdev->irq);
- goto out_destroy_workqueue;
+ goto out_iounmap;
}
SET_NETDEV_DEV(net_dev, &pdev->dev);
@@ -11885,9 +11855,6 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev,
sysfs_remove_group(&pdev->dev.kobj, &ipw_attribute_group);
out_release_irq:
free_irq(pdev->irq, priv);
- out_destroy_workqueue:
- destroy_workqueue(priv->workqueue);
- priv->workqueue = NULL;
out_iounmap:
iounmap(priv->hw_base);
out_pci_release_regions:
@@ -11930,18 +11897,31 @@ static void __devexit ipw_pci_remove(struct pci_dev *pdev)
kfree(priv->cmdlog);
priv->cmdlog = NULL;
}
- /* ipw_down will ensure that there is no more pending work
- * in the workqueue's, so we can safely remove them now. */
- cancel_delayed_work(&priv->adhoc_check);
- cancel_delayed_work(&priv->gather_stats);
- cancel_delayed_work(&priv->request_scan);
- cancel_delayed_work(&priv->request_direct_scan);
- cancel_delayed_work(&priv->request_passive_scan);
- cancel_delayed_work(&priv->scan_event);
- cancel_delayed_work(&priv->rf_kill);
- cancel_delayed_work(&priv->scan_check);
- destroy_workqueue(priv->workqueue);
- priv->workqueue = NULL;
+
+ /* make sure all works are inactive */
+ cancel_delayed_work_sync(&priv->adhoc_check);
+ cancel_work_sync(&priv->associate);
+ cancel_work_sync(&priv->disassociate);
+ cancel_work_sync(&priv->system_config);
+ cancel_work_sync(&priv->rx_replenish);
+ cancel_work_sync(&priv->adapter_restart);
+ cancel_delayed_work_sync(&priv->rf_kill);
+ cancel_work_sync(&priv->up);
+ cancel_work_sync(&priv->down);
+ cancel_delayed_work_sync(&priv->request_scan);
+ cancel_delayed_work_sync(&priv->request_direct_scan);
+ cancel_delayed_work_sync(&priv->request_passive_scan);
+ cancel_delayed_work_sync(&priv->scan_event);
+ cancel_delayed_work_sync(&priv->gather_stats);
+ cancel_work_sync(&priv->abort_scan);
+ cancel_work_sync(&priv->roam);
+ cancel_delayed_work_sync(&priv->scan_check);
+ cancel_work_sync(&priv->link_up);
+ cancel_work_sync(&priv->link_down);
+ cancel_delayed_work_sync(&priv->led_link_on);
+ cancel_delayed_work_sync(&priv->led_link_off);
+ cancel_delayed_work_sync(&priv->led_act_off);
+ cancel_work_sync(&priv->merge_networks);
/* Free MAC hash list for ADHOC */
for (i = 0; i < IPW_IBSS_MAC_HASH_SIZE; i++) {
@@ -12029,7 +12009,7 @@ static int ipw_pci_resume(struct pci_dev *pdev)
priv->suspend_time = get_seconds() - priv->suspend_at;
/* Bring the device back up */
- queue_work(priv->workqueue, &priv->up);
+ schedule_work(&priv->up);
return 0;
}
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.h b/drivers/net/wireless/ipw2x00/ipw2200.h
index d7d049c7a4fa..0441445b8bfa 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.h
+++ b/drivers/net/wireless/ipw2x00/ipw2200.h
@@ -1299,8 +1299,6 @@ struct ipw_priv {
u8 direct_scan_ssid[IW_ESSID_MAX_SIZE];
u8 direct_scan_ssid_len;
- struct workqueue_struct *workqueue;
-
struct delayed_work adhoc_check;
struct work_struct associate;
struct work_struct disassociate;
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index a9b852be4509..39b6f16c87fa 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -402,72 +402,6 @@ static void iwl3945_accumulative_statistics(struct iwl_priv *priv,
}
#endif
-/**
- * iwl3945_good_plcp_health - checks for plcp error.
- *
- * When the plcp error is exceeding the thresholds, reset the radio
- * to improve the throughput.
- */
-static bool iwl3945_good_plcp_health(struct iwl_priv *priv,
- struct iwl_rx_packet *pkt)
-{
- bool rc = true;
- struct iwl3945_notif_statistics current_stat;
- int combined_plcp_delta;
- unsigned int plcp_msec;
- unsigned long plcp_received_jiffies;
-
- if (priv->cfg->base_params->plcp_delta_threshold ==
- IWL_MAX_PLCP_ERR_THRESHOLD_DISABLE) {
- IWL_DEBUG_RADIO(priv, "plcp_err check disabled\n");
- return rc;
- }
- memcpy(&current_stat, pkt->u.raw, sizeof(struct
- iwl3945_notif_statistics));
- /*
- * check for plcp_err and trigger radio reset if it exceeds
- * the plcp error threshold plcp_delta.
- */
- plcp_received_jiffies = jiffies;
- plcp_msec = jiffies_to_msecs((long) plcp_received_jiffies -
- (long) priv->plcp_jiffies);
- priv->plcp_jiffies = plcp_received_jiffies;
- /*
- * check to make sure plcp_msec is not 0 to prevent division
- * by zero.
- */
- if (plcp_msec) {
- combined_plcp_delta =
- (le32_to_cpu(current_stat.rx.ofdm.plcp_err) -
- le32_to_cpu(priv->_3945.statistics.rx.ofdm.plcp_err));
-
- if ((combined_plcp_delta > 0) &&
- ((combined_plcp_delta * 100) / plcp_msec) >
- priv->cfg->base_params->plcp_delta_threshold) {
- /*
- * if plcp_err exceed the threshold, the following
- * data is printed in csv format:
- * Text: plcp_err exceeded %d,
- * Received ofdm.plcp_err,
- * Current ofdm.plcp_err,
- * combined_plcp_delta,
- * plcp_msec
- */
- IWL_DEBUG_RADIO(priv, "plcp_err exceeded %u, "
- "%u, %d, %u mSecs\n",
- priv->cfg->base_params->plcp_delta_threshold,
- le32_to_cpu(current_stat.rx.ofdm.plcp_err),
- combined_plcp_delta, plcp_msec);
- /*
- * Reset the RF radio due to the high plcp
- * error rate
- */
- rc = false;
- }
- }
- return rc;
-}
-
void iwl3945_hw_rx_statistics(struct iwl_priv *priv,
struct iwl_rx_mem_buffer *rxb)
{
@@ -2734,7 +2668,6 @@ static struct iwl_lib_ops iwl3945_lib = {
.isr_ops = {
.isr = iwl_isr_legacy,
},
- .check_plcp_health = iwl3945_good_plcp_health,
.debugfs_ops = {
.rx_stats_read = iwl3945_ucode_rx_stats_read,
diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c
index 79ab0a6b1386..537fb8c84e3a 100644
--- a/drivers/net/wireless/iwlwifi/iwl-5000.c
+++ b/drivers/net/wireless/iwlwifi/iwl-5000.c
@@ -51,7 +51,7 @@
#include "iwl-agn-debugfs.h"
/* Highest firmware API version supported */
-#define IWL5000_UCODE_API_MAX 2
+#define IWL5000_UCODE_API_MAX 5
#define IWL5150_UCODE_API_MAX 2
/* Lowest firmware API version supported */
diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c
index 1eacba4daa5b..0494d7b102d4 100644
--- a/drivers/net/wireless/p54/p54pci.c
+++ b/drivers/net/wireless/p54/p54pci.c
@@ -199,6 +199,7 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
while (i != idx) {
u16 len;
struct sk_buff *skb;
+ dma_addr_t dma_addr;
desc = &ring[i];
len = le16_to_cpu(desc->len);
skb = rx_buf[i];
@@ -216,17 +217,20 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
len = priv->common.rx_mtu;
}
+ dma_addr = le32_to_cpu(desc->host_addr);
+ pci_dma_sync_single_for_cpu(priv->pdev, dma_addr,
+ priv->common.rx_mtu + 32, PCI_DMA_FROMDEVICE);
skb_put(skb, len);
if (p54_rx(dev, skb)) {
- pci_unmap_single(priv->pdev,
- le32_to_cpu(desc->host_addr),
- priv->common.rx_mtu + 32,
- PCI_DMA_FROMDEVICE);
+ pci_unmap_single(priv->pdev, dma_addr,
+ priv->common.rx_mtu + 32, PCI_DMA_FROMDEVICE);
rx_buf[i] = NULL;
- desc->host_addr = 0;
+ desc->host_addr = cpu_to_le32(0);
} else {
skb_trim(skb, 0);
+ pci_dma_sync_single_for_device(priv->pdev, dma_addr,
+ priv->common.rx_mtu + 32, PCI_DMA_FROMDEVICE);
desc->len = cpu_to_le16(priv->common.rx_mtu + 32);
}
diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c
index 21713a7638c4..9b344a921e74 100644
--- a/drivers/net/wireless/p54/p54usb.c
+++ b/drivers/net/wireless/p54/p54usb.c
@@ -98,6 +98,7 @@ static struct usb_device_id p54u_table[] __devinitdata = {
{USB_DEVICE(0x1413, 0x5400)}, /* Telsey 802.11g USB2.0 Adapter */
{USB_DEVICE(0x1435, 0x0427)}, /* Inventel UR054G */
{USB_DEVICE(0x1668, 0x1050)}, /* Actiontec 802UIG-1 */
+ {USB_DEVICE(0x1740, 0x1000)}, /* Senao NUB-350 */
{USB_DEVICE(0x2001, 0x3704)}, /* DLink DWL-G122 rev A2 */
{USB_DEVICE(0x2001, 0x3705)}, /* D-Link DWL-G120 rev C1 */
{USB_DEVICE(0x413c, 0x5513)}, /* Dell WLA3310 USB Wireless Adapter */
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 848cc2cce247..518542b4bf9e 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -2597,6 +2597,9 @@ static int rndis_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
__le32 mode;
int ret;
+ if (priv->device_type != RNDIS_BCM4320B)
+ return -ENOTSUPP;
+
netdev_dbg(usbdev->net, "%s(): %s, %d\n", __func__,
enabled ? "enabled" : "disabled",
timeout);
diff --git a/drivers/net/wireless/rt2x00/rt2800pci.c b/drivers/net/wireless/rt2x00/rt2800pci.c
index aa97971a38af..3b3f1e45ab3e 100644
--- a/drivers/net/wireless/rt2x00/rt2800pci.c
+++ b/drivers/net/wireless/rt2x00/rt2800pci.c
@@ -652,6 +652,12 @@ static void rt2800pci_fill_rxdone(struct queue_entry *entry,
*/
rxdesc->flags |= RX_FLAG_IV_STRIPPED;
+ /*
+ * The hardware has already checked the Michael Mic and has
+ * stripped it from the frame. Signal this to mac80211.
+ */
+ rxdesc->flags |= RX_FLAG_MMIC_STRIPPED;
+
if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS)
rxdesc->flags |= RX_FLAG_DECRYPTED;
else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC)
@@ -1065,6 +1071,8 @@ static DEFINE_PCI_DEVICE_TABLE(rt2800pci_device_table) = {
{ PCI_DEVICE(0x1814, 0x3390), PCI_DEVICE_DATA(&rt2800pci_ops) },
#endif
#ifdef CONFIG_RT2800PCI_RT35XX
+ { PCI_DEVICE(0x1432, 0x7711), PCI_DEVICE_DATA(&rt2800pci_ops) },
+ { PCI_DEVICE(0x1432, 0x7722), PCI_DEVICE_DATA(&rt2800pci_ops) },
{ PCI_DEVICE(0x1814, 0x3060), PCI_DEVICE_DATA(&rt2800pci_ops) },
{ PCI_DEVICE(0x1814, 0x3062), PCI_DEVICE_DATA(&rt2800pci_ops) },
{ PCI_DEVICE(0x1814, 0x3562), PCI_DEVICE_DATA(&rt2800pci_ops) },
diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index b97a4a54ff4c..197a36c05fda 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -486,6 +486,12 @@ static void rt2800usb_fill_rxdone(struct queue_entry *entry,
*/
rxdesc->flags |= RX_FLAG_IV_STRIPPED;
+ /*
+ * The hardware has already checked the Michael Mic and has
+ * stripped it from the frame. Signal this to mac80211.
+ */
+ rxdesc->flags |= RX_FLAG_MMIC_STRIPPED;
+
if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS)
rxdesc->flags |= RX_FLAG_DECRYPTED;
else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC)
diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index ffedfd492754..ea1580085347 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -3,7 +3,7 @@
#
menuconfig NFC_DEVICES
- bool "NFC devices"
+ bool "Near Field Communication (NFC) devices"
default n
---help---
You'll have to say Y if your computer contains an NFC device that
diff --git a/drivers/nfc/pn544.c b/drivers/nfc/pn544.c
index bae647264dd6..724f65d8f9e4 100644
--- a/drivers/nfc/pn544.c
+++ b/drivers/nfc/pn544.c
@@ -60,7 +60,7 @@ enum pn544_irq {
struct pn544_info {
struct miscdevice miscdev;
struct i2c_client *i2c_dev;
- struct regulator_bulk_data regs[2];
+ struct regulator_bulk_data regs[3];
enum pn544_state state;
wait_queue_head_t read_wait;
@@ -74,6 +74,7 @@ struct pn544_info {
static const char reg_vdd_io[] = "Vdd_IO";
static const char reg_vbat[] = "VBat";
+static const char reg_vsim[] = "VSim";
/* sysfs interface */
static ssize_t pn544_test(struct device *dev,
@@ -740,6 +741,7 @@ static int __devinit pn544_probe(struct i2c_client *client,
info->regs[0].supply = reg_vdd_io;
info->regs[1].supply = reg_vbat;
+ info->regs[2].supply = reg_vsim;
r = regulator_bulk_get(&client->dev, ARRAY_SIZE(info->regs),
info->regs);
if (r < 0)
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 3c6e100a3ad0..d06a6374ed6c 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -69,4 +69,10 @@ config OF_MDIO
help
OpenFirmware MDIO bus (Ethernet PHY) accessors
+config OF_PCI
+ def_tristate PCI
+ depends on PCI && (PPC || MICROBLAZE || X86)
+ help
+ OpenFirmware PCI bus accessors
+
endmenu # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 3ab21a0a4907..f7861ed2f287 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_OF_I2C) += of_i2c.o
obj-$(CONFIG_OF_NET) += of_net.o
obj-$(CONFIG_OF_SPI) += of_spi.o
obj-$(CONFIG_OF_MDIO) += of_mdio.o
+obj-$(CONFIG_OF_PCI) += of_pci.o
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
new file mode 100644
index 000000000000..ac1ec54e4fd5
--- /dev/null
+++ b/drivers/of/of_pci.c
@@ -0,0 +1,92 @@
+#include <linux/kernel.h>
+#include <linux/of_pci.h>
+#include <linux/of_irq.h>
+#include <asm/prom.h>
+
+/**
+ * of_irq_map_pci - Resolve the interrupt for a PCI device
+ * @pdev: the device whose interrupt is to be resolved
+ * @out_irq: structure of_irq filled by this function
+ *
+ * This function resolves the PCI interrupt for a given PCI device. If a
+ * device-node exists for a given pci_dev, it will use normal OF tree
+ * walking. If not, it will implement standard swizzling and walk up the
+ * PCI tree until an device-node is found, at which point it will finish
+ * resolving using the OF tree walking.
+ */
+int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
+{
+ struct device_node *dn, *ppnode;
+ struct pci_dev *ppdev;
+ u32 lspec;
+ __be32 lspec_be;
+ __be32 laddr[3];
+ u8 pin;
+ int rc;
+
+ /* Check if we have a device node, if yes, fallback to standard
+ * device tree parsing
+ */
+ dn = pci_device_to_OF_node(pdev);
+ if (dn) {
+ rc = of_irq_map_one(dn, 0, out_irq);
+ if (!rc)
+ return rc;
+ }
+
+ /* Ok, we don't, time to have fun. Let's start by building up an
+ * interrupt spec. we assume #interrupt-cells is 1, which is standard
+ * for PCI. If you do different, then don't use that routine.
+ */
+ rc = pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, &pin);
+ if (rc != 0)
+ return rc;
+ /* No pin, exit */
+ if (pin == 0)
+ return -ENODEV;
+
+ /* Now we walk up the PCI tree */
+ lspec = pin;
+ for (;;) {
+ /* Get the pci_dev of our parent */
+ ppdev = pdev->bus->self;
+
+ /* Ouch, it's a host bridge... */
+ if (ppdev == NULL) {
+ ppnode = pci_bus_to_OF_node(pdev->bus);
+
+ /* No node for host bridge ? give up */
+ if (ppnode == NULL)
+ return -EINVAL;
+ } else {
+ /* We found a P2P bridge, check if it has a node */
+ ppnode = pci_device_to_OF_node(ppdev);
+ }
+
+ /* Ok, we have found a parent with a device-node, hand over to
+ * the OF parsing code.
+ * We build a unit address from the linux device to be used for
+ * resolution. Note that we use the linux bus number which may
+ * not match your firmware bus numbering.
+ * Fortunately, in most cases, interrupt-map-mask doesn't
+ * include the bus number as part of the matching.
+ * You should still be careful about that though if you intend
+ * to rely on this function (you ship a firmware that doesn't
+ * create device nodes for all PCI devices).
+ */
+ if (ppnode)
+ break;
+
+ /* We can only get here if we hit a P2P bridge with no node,
+ * let's do standard swizzling and try again
+ */
+ lspec = pci_swizzle_interrupt_pin(pdev, lspec);
+ pdev = ppdev;
+ }
+
+ lspec_be = cpu_to_be32(lspec);
+ laddr[0] = cpu_to_be32((pdev->bus->number << 16) | (pdev->devfn << 8));
+ laddr[1] = laddr[2] = cpu_to_be32(0);
+ return of_irq_map_raw(ppnode, &lspec_be, 1, laddr, out_irq);
+}
+EXPORT_SYMBOL_GPL(of_irq_map_pci);
diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c
index 28295d0a50f6..4d87b5dc9284 100644
--- a/drivers/of/pdt.c
+++ b/drivers/of/pdt.c
@@ -36,19 +36,55 @@ unsigned int of_pdt_unique_id __initdata;
(p)->unique_id = of_pdt_unique_id++; \
} while (0)
-static inline const char *of_pdt_node_name(struct device_node *dp)
+static char * __init of_pdt_build_full_name(struct device_node *dp)
{
- return dp->path_component_name;
+ int len, ourlen, plen;
+ char *n;
+
+ dp->path_component_name = build_path_component(dp);
+
+ plen = strlen(dp->parent->full_name);
+ ourlen = strlen(dp->path_component_name);
+ len = ourlen + plen + 2;
+
+ n = prom_early_alloc(len);
+ strcpy(n, dp->parent->full_name);
+ if (!of_node_is_root(dp->parent)) {
+ strcpy(n + plen, "/");
+ plen++;
+ }
+ strcpy(n + plen, dp->path_component_name);
+
+ return n;
}
-#else
+#else /* CONFIG_SPARC */
static inline void of_pdt_incr_unique_id(void *p) { }
static inline void irq_trans_init(struct device_node *dp) { }
-static inline const char *of_pdt_node_name(struct device_node *dp)
+static char * __init of_pdt_build_full_name(struct device_node *dp)
{
- return dp->name;
+ static int failsafe_id = 0; /* for generating unique names on failure */
+ char *buf;
+ int len;
+
+ if (of_pdt_prom_ops->pkg2path(dp->phandle, NULL, 0, &len))
+ goto failsafe;
+
+ buf = prom_early_alloc(len + 1);
+ if (of_pdt_prom_ops->pkg2path(dp->phandle, buf, len, &len))
+ goto failsafe;
+ return buf;
+
+ failsafe:
+ buf = prom_early_alloc(strlen(dp->parent->full_name) +
+ strlen(dp->name) + 16);
+ sprintf(buf, "%s/%s@unknown%i",
+ of_node_is_root(dp->parent) ? "" : dp->parent->full_name,
+ dp->name, failsafe_id++);
+ pr_err("%s: pkg2path failed; assigning %s\n", __func__, buf);
+ return buf;
}
#endif /* !CONFIG_SPARC */
@@ -132,47 +168,6 @@ static char * __init of_pdt_get_one_property(phandle node, const char *name)
return buf;
}
-static char * __init of_pdt_try_pkg2path(phandle node)
-{
- char *res, *buf = NULL;
- int len;
-
- if (!of_pdt_prom_ops->pkg2path)
- return NULL;
-
- if (of_pdt_prom_ops->pkg2path(node, buf, 0, &len))
- return NULL;
- buf = prom_early_alloc(len + 1);
- if (of_pdt_prom_ops->pkg2path(node, buf, len, &len)) {
- pr_err("%s: package-to-path failed\n", __func__);
- return NULL;
- }
-
- res = strrchr(buf, '/');
- if (!res) {
- pr_err("%s: couldn't find / in %s\n", __func__, buf);
- return NULL;
- }
- return res+1;
-}
-
-/*
- * When fetching the node's name, first try using package-to-path; if
- * that fails (either because the arch hasn't supplied a PROM callback,
- * or some other random failure), fall back to just looking at the node's
- * 'name' property.
- */
-static char * __init of_pdt_build_name(phandle node)
-{
- char *buf;
-
- buf = of_pdt_try_pkg2path(node);
- if (!buf)
- buf = of_pdt_get_one_property(node, "name");
-
- return buf;
-}
-
static struct device_node * __init of_pdt_create_node(phandle node,
struct device_node *parent)
{
@@ -187,7 +182,7 @@ static struct device_node * __init of_pdt_create_node(phandle node,
kref_init(&dp->kref);
- dp->name = of_pdt_build_name(node);
+ dp->name = of_pdt_get_one_property(node, "name");
dp->type = of_pdt_get_one_property(node, "device_type");
dp->phandle = node;
@@ -198,26 +193,6 @@ static struct device_node * __init of_pdt_create_node(phandle node,
return dp;
}
-static char * __init of_pdt_build_full_name(struct device_node *dp)
-{
- int len, ourlen, plen;
- char *n;
-
- plen = strlen(dp->parent->full_name);
- ourlen = strlen(of_pdt_node_name(dp));
- len = ourlen + plen + 2;
-
- n = prom_early_alloc(len);
- strcpy(n, dp->parent->full_name);
- if (!of_node_is_root(dp->parent)) {
- strcpy(n + plen, "/");
- plen++;
- }
- strcpy(n + plen, of_pdt_node_name(dp));
-
- return n;
-}
-
static struct device_node * __init of_pdt_build_tree(struct device_node *parent,
phandle node,
struct device_node ***nextp)
@@ -240,9 +215,6 @@ static struct device_node * __init of_pdt_build_tree(struct device_node *parent,
*(*nextp) = dp;
*nextp = &dp->allnext;
-#if defined(CONFIG_SPARC)
- dp->path_component_name = build_path_component(dp);
-#endif
dp->full_name = of_pdt_build_full_name(dp);
dp->child = of_pdt_build_tree(dp,
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 88246dd46452..d86ea8b01137 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -431,7 +431,7 @@ static void pci_device_shutdown(struct device *dev)
pci_msix_shutdown(pci_dev);
}
-#ifdef CONFIG_PM_OPS
+#ifdef CONFIG_PM
/* Auxiliary functions used for system resume and run-time resume. */
@@ -1059,7 +1059,7 @@ static int pci_pm_runtime_idle(struct device *dev)
#endif /* !CONFIG_PM_RUNTIME */
-#ifdef CONFIG_PM_OPS
+#ifdef CONFIG_PM
const struct dev_pm_ops pci_dev_pm_ops = {
.prepare = pci_pm_prepare,
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 3a5a6fcc0ead..492b7d807fe8 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -243,7 +243,7 @@ struct pci_ops pcifront_bus_ops = {
#ifdef CONFIG_PCI_MSI
static int pci_frontend_enable_msix(struct pci_dev *dev,
- int **vector, int nvec)
+ int vector[], int nvec)
{
int err;
int i;
@@ -277,18 +277,24 @@ static int pci_frontend_enable_msix(struct pci_dev *dev,
if (likely(!err)) {
if (likely(!op.value)) {
/* we get the result */
- for (i = 0; i < nvec; i++)
- *(*vector+i) = op.msix_entries[i].vector;
- return 0;
+ for (i = 0; i < nvec; i++) {
+ if (op.msix_entries[i].vector <= 0) {
+ dev_warn(&dev->dev, "MSI-X entry %d is invalid: %d!\n",
+ i, op.msix_entries[i].vector);
+ err = -EINVAL;
+ vector[i] = -1;
+ continue;
+ }
+ vector[i] = op.msix_entries[i].vector;
+ }
} else {
printk(KERN_DEBUG "enable msix get value %x\n",
op.value);
- return op.value;
}
} else {
dev_err(&dev->dev, "enable msix get err %x\n", err);
- return err;
}
+ return err;
}
static void pci_frontend_disable_msix(struct pci_dev *dev)
@@ -310,7 +316,7 @@ static void pci_frontend_disable_msix(struct pci_dev *dev)
dev_err(&dev->dev, "pci_disable_msix get err %x\n", err);
}
-static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
+static int pci_frontend_enable_msi(struct pci_dev *dev, int vector[])
{
int err;
struct xen_pci_op op = {
@@ -324,7 +330,13 @@ static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector)
err = do_pci_op(pdev, &op);
if (likely(!err)) {
- *(*vector) = op.value;
+ vector[0] = op.value;
+ if (op.value <= 0) {
+ dev_warn(&dev->dev, "MSI entry is invalid: %d!\n",
+ op.value);
+ err = -EINVAL;
+ vector[0] = -1;
+ }
} else {
dev_err(&dev->dev, "pci frontend enable msi failed for dev "
"%x:%x\n", op.bus, op.devfn);
@@ -733,8 +745,7 @@ static void free_pdev(struct pcifront_device *pdev)
pcifront_free_roots(pdev);
- /*For PCIE_AER error handling job*/
- flush_scheduled_work();
+ cancel_work_sync(&pdev->op_work);
if (pdev->irq >= 0)
unbind_from_irqhandler(pdev->irq, pdev);
diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c
index 0bdda5b3ed55..42fbf1a75576 100644
--- a/drivers/pcmcia/pcmcia_resource.c
+++ b/drivers/pcmcia/pcmcia_resource.c
@@ -518,6 +518,8 @@ int pcmcia_enable_device(struct pcmcia_device *p_dev)
flags |= CONF_ENABLE_IOCARD;
if (flags & CONF_ENABLE_IOCARD)
s->socket.flags |= SS_IOCARD;
+ if (flags & CONF_ENABLE_ZVCARD)
+ s->socket.flags |= SS_ZVCARD | SS_IOCARD;
if (flags & CONF_ENABLE_SPKR) {
s->socket.flags |= SS_SPKR_ENA;
status = CCSR_AUDIO_ENA;
diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c
index 3755e7c8c715..2c540542b5af 100644
--- a/drivers/pcmcia/pxa2xx_base.c
+++ b/drivers/pcmcia/pxa2xx_base.c
@@ -215,7 +215,7 @@ pxa2xx_pcmcia_frequency_change(struct soc_pcmcia_socket *skt,
}
#endif
-static void pxa2xx_configure_sockets(struct device *dev)
+void pxa2xx_configure_sockets(struct device *dev)
{
struct pcmcia_low_level *ops = dev->platform_data;
/*
diff --git a/drivers/pcmcia/pxa2xx_base.h b/drivers/pcmcia/pxa2xx_base.h
index bb62ea87b8f9..b609b45469ed 100644
--- a/drivers/pcmcia/pxa2xx_base.h
+++ b/drivers/pcmcia/pxa2xx_base.h
@@ -1,3 +1,4 @@
int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt);
void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops);
+void pxa2xx_configure_sockets(struct device *dev);
diff --git a/drivers/pcmcia/pxa2xx_colibri.c b/drivers/pcmcia/pxa2xx_colibri.c
index c3f72192af66..a52039564e74 100644
--- a/drivers/pcmcia/pxa2xx_colibri.c
+++ b/drivers/pcmcia/pxa2xx_colibri.c
@@ -181,6 +181,9 @@ static int __init colibri_pcmcia_init(void)
{
int ret;
+ if (!machine_is_colibri() && !machine_is_colibri320())
+ return -ENODEV;
+
colibri_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1);
if (!colibri_pcmcia_device)
return -ENOMEM;
diff --git a/drivers/pcmcia/pxa2xx_lubbock.c b/drivers/pcmcia/pxa2xx_lubbock.c
index b9f8c8fb42bd..25afe637c657 100644
--- a/drivers/pcmcia/pxa2xx_lubbock.c
+++ b/drivers/pcmcia/pxa2xx_lubbock.c
@@ -226,6 +226,7 @@ int pcmcia_lubbock_init(struct sa1111_dev *sadev)
lubbock_set_misc_wr((1 << 15) | (1 << 14), 0);
pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops);
+ pxa2xx_configure_sockets(&sadev->dev);
ret = sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops,
pxa2xx_drv_pcmcia_add_one);
}
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index d163bc2e2b9e..a59af5b24f0a 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -227,7 +227,7 @@ config SONYPI_COMPAT
config IDEAPAD_LAPTOP
tristate "Lenovo IdeaPad Laptop Extras"
depends on ACPI
- depends on RFKILL
+ depends on RFKILL && INPUT
select INPUT_SPARSEKMAP
help
This is a driver for the rfkill switches on Lenovo IdeaPad netbooks.
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index c5c4b8c32eb8..38b34a73866a 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -84,7 +84,7 @@ MODULE_LICENSE("GPL");
*/
#define AMW0_GUID1 "67C3371D-95A3-4C37-BB61-DD47B491DAAB"
#define AMW0_GUID2 "431F16ED-0C2B-444C-B267-27DEB140CF9C"
-#define WMID_GUID1 "6AF4F258-B401-42fd-BE91-3D4AC2D7C0D3"
+#define WMID_GUID1 "6AF4F258-B401-42FD-BE91-3D4AC2D7C0D3"
#define WMID_GUID2 "95764E09-FB56-4e83-B31A-37761F60994A"
#define WMID_GUID3 "61EF69EA-865C-4BC3-A502-A0DEBA0CB531"
@@ -1280,7 +1280,7 @@ static ssize_t set_bool_threeg(struct device *dev,
return -EINVAL;
return count;
}
-static DEVICE_ATTR(threeg, S_IWUGO | S_IRUGO | S_IWUSR, show_bool_threeg,
+static DEVICE_ATTR(threeg, S_IRUGO | S_IWUSR, show_bool_threeg,
set_bool_threeg);
static ssize_t show_interface(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c
index 4633fd8532cc..fe495939c307 100644
--- a/drivers/platform/x86/asus_acpi.c
+++ b/drivers/platform/x86/asus_acpi.c
@@ -1081,14 +1081,8 @@ static int asus_hotk_add_fs(struct acpi_device *device)
struct proc_dir_entry *proc;
mode_t mode;
- /*
- * If parameter uid or gid is not changed, keep the default setting for
- * our proc entries (-rw-rw-rw-) else, it means we care about security,
- * and then set to -rw-rw----
- */
-
if ((asus_uid == 0) && (asus_gid == 0)) {
- mode = S_IFREG | S_IRUGO | S_IWUGO;
+ mode = S_IFREG | S_IRUGO | S_IWUSR | S_IWGRP;
} else {
mode = S_IFREG | S_IRUSR | S_IRGRP | S_IWUSR | S_IWGRP;
printk(KERN_WARNING " asus_uid and asus_gid parameters are "
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index 34657f96b5a5..ad24ef36f9f7 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -290,9 +290,12 @@ static int dell_rfkill_set(void *data, bool blocked)
dell_send_request(buffer, 17, 11);
/* If the hardware switch controls this radio, and the hardware
- switch is disabled, don't allow changing the software state */
+ switch is disabled, don't allow changing the software state.
+ If the hardware switch is reported as not supported, always
+ fire the SMI to toggle the killswitch. */
if ((hwswitch_state & BIT(hwswitch_bit)) &&
- !(buffer->output[1] & BIT(16))) {
+ !(buffer->output[1] & BIT(16)) &&
+ (buffer->output[1] & BIT(0))) {
ret = -EINVAL;
goto out;
}
@@ -398,6 +401,23 @@ static const struct file_operations dell_debugfs_fops = {
static void dell_update_rfkill(struct work_struct *ignored)
{
+ int status;
+
+ get_buffer();
+ dell_send_request(buffer, 17, 11);
+ status = buffer->output[1];
+ release_buffer();
+
+ /* if hardware rfkill is not supported, set it explicitly */
+ if (!(status & BIT(0))) {
+ if (wifi_rfkill)
+ dell_rfkill_set((void *)1, !((status & BIT(17)) >> 17));
+ if (bluetooth_rfkill)
+ dell_rfkill_set((void *)2, !((status & BIT(18)) >> 18));
+ if (wwan_rfkill)
+ dell_rfkill_set((void *)3, !((status & BIT(19)) >> 19));
+ }
+
if (wifi_rfkill)
dell_rfkill_query(wifi_rfkill, (void *)1);
if (bluetooth_rfkill)
diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c
index 930e62762365..61433d492862 100644
--- a/drivers/platform/x86/intel_pmic_gpio.c
+++ b/drivers/platform/x86/intel_pmic_gpio.c
@@ -60,69 +60,20 @@ enum pmic_gpio_register {
#define GPOSW_DOU 0x08
#define GPOSW_RDRV 0x30
+#define GPIO_UPDATE_TYPE 0x80000000
#define NUM_GPIO 24
-struct pmic_gpio_irq {
- spinlock_t lock;
- u32 trigger[NUM_GPIO];
- u32 dirty;
- struct work_struct work;
-};
-
-
struct pmic_gpio {
+ struct mutex buslock;
struct gpio_chip chip;
- struct pmic_gpio_irq irqtypes;
void *gpiointr;
int irq;
unsigned irq_base;
+ unsigned int update_type;
+ u32 trigger_type;
};
-static void pmic_program_irqtype(int gpio, int type)
-{
- if (type & IRQ_TYPE_EDGE_RISING)
- intel_scu_ipc_update_register(GPIO0 + gpio, 0x20, 0x20);
- else
- intel_scu_ipc_update_register(GPIO0 + gpio, 0x00, 0x20);
-
- if (type & IRQ_TYPE_EDGE_FALLING)
- intel_scu_ipc_update_register(GPIO0 + gpio, 0x10, 0x10);
- else
- intel_scu_ipc_update_register(GPIO0 + gpio, 0x00, 0x10);
-};
-
-static void pmic_irqtype_work(struct work_struct *work)
-{
- struct pmic_gpio_irq *t =
- container_of(work, struct pmic_gpio_irq, work);
- unsigned long flags;
- int i;
- u16 type;
-
- spin_lock_irqsave(&t->lock, flags);
- /* As we drop the lock, we may need multiple scans if we race the
- pmic_irq_type function */
- while (t->dirty) {
- /*
- * For each pin that has the dirty bit set send an IPC
- * message to configure the hardware via the PMIC
- */
- for (i = 0; i < NUM_GPIO; i++) {
- if (!(t->dirty & (1 << i)))
- continue;
- t->dirty &= ~(1 << i);
- /* We can't trust the array entry or dirty
- once the lock is dropped */
- type = t->trigger[i];
- spin_unlock_irqrestore(&t->lock, flags);
- pmic_program_irqtype(i, type);
- spin_lock_irqsave(&t->lock, flags);
- }
- }
- spin_unlock_irqrestore(&t->lock, flags);
-}
-
static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
{
if (offset > 8) {
@@ -190,25 +141,24 @@ static void pmic_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
1 << (offset - 16));
}
-static int pmic_irq_type(unsigned irq, unsigned type)
+/*
+ * This is called from genirq with pg->buslock locked and
+ * irq_desc->lock held. We can not access the scu bus here, so we
+ * store the change and update in the bus_sync_unlock() function below
+ */
+static int pmic_irq_type(struct irq_data *data, unsigned type)
{
- struct pmic_gpio *pg = get_irq_chip_data(irq);
- u32 gpio = irq - pg->irq_base;
- unsigned long flags;
+ struct pmic_gpio *pg = irq_data_get_irq_chip_data(data);
+ u32 gpio = data->irq - pg->irq_base;
if (gpio >= pg->chip.ngpio)
return -EINVAL;
- spin_lock_irqsave(&pg->irqtypes.lock, flags);
- pg->irqtypes.trigger[gpio] = type;
- pg->irqtypes.dirty |= (1 << gpio);
- spin_unlock_irqrestore(&pg->irqtypes.lock, flags);
- schedule_work(&pg->irqtypes.work);
+ pg->trigger_type = type;
+ pg->update_type = gpio | GPIO_UPDATE_TYPE;
return 0;
}
-
-
static int pmic_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
{
struct pmic_gpio *pg = container_of(chip, struct pmic_gpio, chip);
@@ -217,38 +167,32 @@ static int pmic_gpio_to_irq(struct gpio_chip *chip, unsigned offset)
}
/* the gpiointr register is read-clear, so just do nothing. */
-static void pmic_irq_unmask(unsigned irq)
-{
-};
+static void pmic_irq_unmask(struct irq_data *data) { }
-static void pmic_irq_mask(unsigned irq)
-{
-};
+static void pmic_irq_mask(struct irq_data *data) { }
static struct irq_chip pmic_irqchip = {
.name = "PMIC-GPIO",
- .mask = pmic_irq_mask,
- .unmask = pmic_irq_unmask,
- .set_type = pmic_irq_type,
+ .irq_mask = pmic_irq_mask,
+ .irq_unmask = pmic_irq_unmask,
+ .irq_set_type = pmic_irq_type,
};
-static void pmic_irq_handler(unsigned irq, struct irq_desc *desc)
+static irqreturn_t pmic_irq_handler(int irq, void *data)
{
- struct pmic_gpio *pg = (struct pmic_gpio *)get_irq_data(irq);
+ struct pmic_gpio *pg = data;
u8 intsts = *((u8 *)pg->gpiointr + 4);
int gpio;
+ irqreturn_t ret = IRQ_NONE;
for (gpio = 0; gpio < 8; gpio++) {
if (intsts & (1 << gpio)) {
pr_debug("pmic pin %d triggered\n", gpio);
generic_handle_irq(pg->irq_base + gpio);
+ ret = IRQ_HANDLED;
}
}
-
- if (desc->chip->irq_eoi)
- desc->chip->irq_eoi(irq_get_irq_data(irq));
- else
- dev_warn(pg->chip.dev, "missing EOI handler for irq %d\n", irq);
+ return ret;
}
static int __devinit platform_pmic_gpio_probe(struct platform_device *pdev)
@@ -297,8 +241,7 @@ static int __devinit platform_pmic_gpio_probe(struct platform_device *pdev)
pg->chip.can_sleep = 1;
pg->chip.dev = dev;
- INIT_WORK(&pg->irqtypes.work, pmic_irqtype_work);
- spin_lock_init(&pg->irqtypes.lock);
+ mutex_init(&pg->buslock);
pg->chip.dev = dev;
retval = gpiochip_add(&pg->chip);
@@ -306,8 +249,13 @@ static int __devinit platform_pmic_gpio_probe(struct platform_device *pdev)
printk(KERN_ERR "%s: Can not add pmic gpio chip.\n", __func__);
goto err;
}
- set_irq_data(pg->irq, pg);
- set_irq_chained_handler(pg->irq, pmic_irq_handler);
+
+ retval = request_irq(pg->irq, pmic_irq_handler, 0, "pmic", pg);
+ if (retval) {
+ printk(KERN_WARNING "pmic: Interrupt request failed\n");
+ goto err;
+ }
+
for (i = 0; i < 8; i++) {
set_irq_chip_and_handler_name(i + pg->irq_base, &pmic_irqchip,
handle_simple_irq, "demux");
diff --git a/drivers/platform/x86/tc1100-wmi.c b/drivers/platform/x86/tc1100-wmi.c
index 1fe0f1feff71..865ef78d6f1a 100644
--- a/drivers/platform/x86/tc1100-wmi.c
+++ b/drivers/platform/x86/tc1100-wmi.c
@@ -162,7 +162,7 @@ set_bool_##value(struct device *dev, struct device_attribute *attr, \
return -EINVAL; \
return count; \
} \
-static DEVICE_ATTR(value, S_IWUGO | S_IRUGO | S_IWUSR, \
+static DEVICE_ATTR(value, S_IRUGO | S_IWUSR, \
show_bool_##value, set_bool_##value);
show_set_bool(wireless, TC1100_INSTANCE_WIRELESS);
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index dd599585c6a9..eb9922385ef8 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -2275,16 +2275,12 @@ static void tpacpi_input_send_key(const unsigned int scancode)
if (keycode != KEY_RESERVED) {
mutex_lock(&tpacpi_inputdev_send_mutex);
+ input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN, scancode);
input_report_key(tpacpi_inputdev, keycode, 1);
- if (keycode == KEY_UNKNOWN)
- input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN,
- scancode);
input_sync(tpacpi_inputdev);
+ input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN, scancode);
input_report_key(tpacpi_inputdev, keycode, 0);
- if (keycode == KEY_UNKNOWN)
- input_event(tpacpi_inputdev, EV_MSC, MSC_SCAN,
- scancode);
input_sync(tpacpi_inputdev);
mutex_unlock(&tpacpi_inputdev_send_mutex);
diff --git a/drivers/pps/generators/Kconfig b/drivers/pps/generators/Kconfig
index f3a73dd77660..e4c4f3dc0728 100644
--- a/drivers/pps/generators/Kconfig
+++ b/drivers/pps/generators/Kconfig
@@ -6,7 +6,7 @@ comment "PPS generators support"
config PPS_GENERATOR_PARPORT
tristate "Parallel port PPS signal generator"
- depends on PARPORT
+ depends on PARPORT && BROKEN
help
If you say yes here you get support for a PPS signal generator which
utilizes STROBE pin of a parallel port to send PPS signals. It uses
diff --git a/drivers/pps/kapi.c b/drivers/pps/kapi.c
index cba1b43f7519..a4e8eb9fece6 100644
--- a/drivers/pps/kapi.c
+++ b/drivers/pps/kapi.c
@@ -168,7 +168,7 @@ void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event,
{
unsigned long flags;
int captured = 0;
- struct pps_ktime ts_real;
+ struct pps_ktime ts_real = { .sec = 0, .nsec = 0, .flags = 0 };
/* check event type */
BUG_ON((event & (PPS_CAPTUREASSERT | PPS_CAPTURECLEAR)) == 0);
diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c
index 76b41853a877..1269fbd2deca 100644
--- a/drivers/rapidio/rio-sysfs.c
+++ b/drivers/rapidio/rio-sysfs.c
@@ -77,9 +77,9 @@ rio_read_config(struct file *filp, struct kobject *kobj,
/* Several chips lock up trying to read undefined config space */
if (capable(CAP_SYS_ADMIN))
- size = 0x200000;
+ size = RIO_MAINT_SPACE_SZ;
- if (off > size)
+ if (off >= size)
return 0;
if (off + count > size) {
size -= off;
@@ -147,10 +147,10 @@ rio_write_config(struct file *filp, struct kobject *kobj,
loff_t init_off = off;
u8 *data = (u8 *) buf;
- if (off > 0x200000)
+ if (off >= RIO_MAINT_SPACE_SZ)
return 0;
- if (off + count > 0x200000) {
- size = 0x200000 - off;
+ if (off + count > RIO_MAINT_SPACE_SZ) {
+ size = RIO_MAINT_SPACE_SZ - off;
count = size;
}
@@ -200,7 +200,7 @@ static struct bin_attribute rio_config_attr = {
.name = "config",
.mode = S_IRUGO | S_IWUSR,
},
- .size = 0x200000,
+ .size = RIO_MAINT_SPACE_SZ,
.read = rio_read_config,
.write = rio_write_config,
};
diff --git a/drivers/regulator/mc13xxx-regulator-core.c b/drivers/regulator/mc13xxx-regulator-core.c
index f53d31b950d4..2bb5de1f2421 100644
--- a/drivers/regulator/mc13xxx-regulator-core.c
+++ b/drivers/regulator/mc13xxx-regulator-core.c
@@ -174,7 +174,7 @@ static int mc13xxx_regulator_get_voltage(struct regulator_dev *rdev)
dev_dbg(rdev_get_dev(rdev), "%s id: %d val: %d\n", __func__, id, val);
- BUG_ON(val < 0 || val > mc13xxx_regulators[id].desc.n_voltages);
+ BUG_ON(val > mc13xxx_regulators[id].desc.n_voltages);
return mc13xxx_regulators[id].voltages[val];
}
diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
index 8b0d2c4bde91..06df898842c0 100644
--- a/drivers/regulator/wm831x-dcdc.c
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -120,6 +120,7 @@ static unsigned int wm831x_dcdc_get_mode(struct regulator_dev *rdev)
return REGULATOR_MODE_IDLE;
default:
BUG();
+ return -EINVAL;
}
}
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index cdd97192dc69..4941cade319f 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -97,6 +97,18 @@ config RTC_INTF_DEV
If unsure, say Y.
+config RTC_INTF_DEV_UIE_EMUL
+ bool "RTC UIE emulation on dev interface"
+ depends on RTC_INTF_DEV
+ help
+ Provides an emulation for RTC_UIE if the underlying rtc chip
+ driver does not expose RTC_UIE ioctls. Those requests generate
+ once-per-second update interrupts, used for synchronization.
+
+ The emulation code will read the time from the hardware
+ clock several times per second, please enable this option
+ only if you know that you really need it.
+
config RTC_DRV_TEST
tristate "Test driver/device"
help
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index c404b61386bf..09b4437b3e61 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -117,6 +117,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
struct module *owner)
{
struct rtc_device *rtc;
+ struct rtc_wkalrm alrm;
int id, err;
if (idr_pre_get(&rtc_idr, GFP_KERNEL) == 0) {
@@ -166,6 +167,12 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
rtc->pie_timer.function = rtc_pie_update_irq;
rtc->pie_enabled = 0;
+ /* Check to see if there is an ALARM already set in hw */
+ err = __rtc_read_alarm(rtc, &alrm);
+
+ if (!err && !rtc_valid_tm(&alrm.time))
+ rtc_set_alarm(rtc, &alrm);
+
strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
dev_set_name(&rtc->dev, "rtc%d", id);
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index a0c01967244d..8ec6b069a7f5 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -116,6 +116,186 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
}
EXPORT_SYMBOL_GPL(rtc_set_mmss);
+static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+{
+ int err;
+
+ err = mutex_lock_interruptible(&rtc->ops_lock);
+ if (err)
+ return err;
+
+ if (rtc->ops == NULL)
+ err = -ENODEV;
+ else if (!rtc->ops->read_alarm)
+ err = -EINVAL;
+ else {
+ memset(alarm, 0, sizeof(struct rtc_wkalrm));
+ err = rtc->ops->read_alarm(rtc->dev.parent, alarm);
+ }
+
+ mutex_unlock(&rtc->ops_lock);
+ return err;
+}
+
+int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+{
+ int err;
+ struct rtc_time before, now;
+ int first_time = 1;
+ unsigned long t_now, t_alm;
+ enum { none, day, month, year } missing = none;
+ unsigned days;
+
+ /* The lower level RTC driver may return -1 in some fields,
+ * creating invalid alarm->time values, for reasons like:
+ *
+ * - The hardware may not be capable of filling them in;
+ * many alarms match only on time-of-day fields, not
+ * day/month/year calendar data.
+ *
+ * - Some hardware uses illegal values as "wildcard" match
+ * values, which non-Linux firmware (like a BIOS) may try
+ * to set up as e.g. "alarm 15 minutes after each hour".
+ * Linux uses only oneshot alarms.
+ *
+ * When we see that here, we deal with it by using values from
+ * a current RTC timestamp for any missing (-1) values. The
+ * RTC driver prevents "periodic alarm" modes.
+ *
+ * But this can be racey, because some fields of the RTC timestamp
+ * may have wrapped in the interval since we read the RTC alarm,
+ * which would lead to us inserting inconsistent values in place
+ * of the -1 fields.
+ *
+ * Reading the alarm and timestamp in the reverse sequence
+ * would have the same race condition, and not solve the issue.
+ *
+ * So, we must first read the RTC timestamp,
+ * then read the RTC alarm value,
+ * and then read a second RTC timestamp.
+ *
+ * If any fields of the second timestamp have changed
+ * when compared with the first timestamp, then we know
+ * our timestamp may be inconsistent with that used by
+ * the low-level rtc_read_alarm_internal() function.
+ *
+ * So, when the two timestamps disagree, we just loop and do
+ * the process again to get a fully consistent set of values.
+ *
+ * This could all instead be done in the lower level driver,
+ * but since more than one lower level RTC implementation needs it,
+ * then it's probably best best to do it here instead of there..
+ */
+
+ /* Get the "before" timestamp */
+ err = rtc_read_time(rtc, &before);
+ if (err < 0)
+ return err;
+ do {
+ if (!first_time)
+ memcpy(&before, &now, sizeof(struct rtc_time));
+ first_time = 0;
+
+ /* get the RTC alarm values, which may be incomplete */
+ err = rtc_read_alarm_internal(rtc, alarm);
+ if (err)
+ return err;
+
+ /* full-function RTCs won't have such missing fields */
+ if (rtc_valid_tm(&alarm->time) == 0)
+ return 0;
+
+ /* get the "after" timestamp, to detect wrapped fields */
+ err = rtc_read_time(rtc, &now);
+ if (err < 0)
+ return err;
+
+ /* note that tm_sec is a "don't care" value here: */
+ } while ( before.tm_min != now.tm_min
+ || before.tm_hour != now.tm_hour
+ || before.tm_mon != now.tm_mon
+ || before.tm_year != now.tm_year);
+
+ /* Fill in the missing alarm fields using the timestamp; we
+ * know there's at least one since alarm->time is invalid.
+ */
+ if (alarm->time.tm_sec == -1)
+ alarm->time.tm_sec = now.tm_sec;
+ if (alarm->time.tm_min == -1)
+ alarm->time.tm_min = now.tm_min;
+ if (alarm->time.tm_hour == -1)
+ alarm->time.tm_hour = now.tm_hour;
+
+ /* For simplicity, only support date rollover for now */
+ if (alarm->time.tm_mday == -1) {
+ alarm->time.tm_mday = now.tm_mday;
+ missing = day;
+ }
+ if (alarm->time.tm_mon == -1) {
+ alarm->time.tm_mon = now.tm_mon;
+ if (missing == none)
+ missing = month;
+ }
+ if (alarm->time.tm_year == -1) {
+ alarm->time.tm_year = now.tm_year;
+ if (missing == none)
+ missing = year;
+ }
+
+ /* with luck, no rollover is needed */
+ rtc_tm_to_time(&now, &t_now);
+ rtc_tm_to_time(&alarm->time, &t_alm);
+ if (t_now < t_alm)
+ goto done;
+
+ switch (missing) {
+
+ /* 24 hour rollover ... if it's now 10am Monday, an alarm that
+ * that will trigger at 5am will do so at 5am Tuesday, which
+ * could also be in the next month or year. This is a common
+ * case, especially for PCs.
+ */
+ case day:
+ dev_dbg(&rtc->dev, "alarm rollover: %s\n", "day");
+ t_alm += 24 * 60 * 60;
+ rtc_time_to_tm(t_alm, &alarm->time);
+ break;
+
+ /* Month rollover ... if it's the 31th, an alarm on the 3rd will
+ * be next month. An alarm matching on the 30th, 29th, or 28th
+ * may end up in the month after that! Many newer PCs support
+ * this type of alarm.
+ */
+ case month:
+ dev_dbg(&rtc->dev, "alarm rollover: %s\n", "month");
+ do {
+ if (alarm->time.tm_mon < 11)
+ alarm->time.tm_mon++;
+ else {
+ alarm->time.tm_mon = 0;
+ alarm->time.tm_year++;
+ }
+ days = rtc_month_days(alarm->time.tm_mon,
+ alarm->time.tm_year);
+ } while (days < alarm->time.tm_mday);
+ break;
+
+ /* Year rollover ... easy except for leap years! */
+ case year:
+ dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year");
+ do {
+ alarm->time.tm_year++;
+ } while (rtc_valid_tm(&alarm->time) != 0);
+ break;
+
+ default:
+ dev_warn(&rtc->dev, "alarm rollover not handled\n");
+ }
+
+done:
+ return 0;
+}
+
int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
{
int err;
@@ -209,9 +389,8 @@ int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
}
if (err)
- return err;
-
- if (!rtc->ops)
+ /* nothing */;
+ else if (!rtc->ops)
err = -ENODEV;
else if (!rtc->ops->alarm_irq_enable)
err = -EINVAL;
@@ -229,6 +408,12 @@ int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled)
if (err)
return err;
+#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
+ if (enabled == 0 && rtc->uie_irq_active) {
+ mutex_unlock(&rtc->ops_lock);
+ return rtc_dev_update_irq_enable_emul(rtc, 0);
+ }
+#endif
/* make sure we're changing state */
if (rtc->uie_rtctimer.enabled == enabled)
goto out;
@@ -248,6 +433,16 @@ int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled)
out:
mutex_unlock(&rtc->ops_lock);
+#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
+ /*
+ * Enable emulation if the driver did not provide
+ * the update_irq_enable function pointer or if returned
+ * -EINVAL to signal that it has been configured without
+ * interrupts or that are not available at the moment.
+ */
+ if (err == -EINVAL)
+ err = rtc_dev_update_irq_enable_emul(rtc, enabled);
+#endif
return err;
}
@@ -263,7 +458,7 @@ EXPORT_SYMBOL_GPL(rtc_update_irq_enable);
*
* Triggers the registered irq_task function callback.
*/
-static void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode)
+void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode)
{
unsigned long flags;
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 26d1cf5d19ae..518a76ec71ca 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -183,33 +183,6 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
return 0;
}
-/*
- * Handle commands from user-space
- */
-static int at91_rtc_ioctl(struct device *dev, unsigned int cmd,
- unsigned long arg)
-{
- int ret = 0;
-
- pr_debug("%s(): cmd=%08x, arg=%08lx.\n", __func__, cmd, arg);
-
- /* important: scrub old status before enabling IRQs */
- switch (cmd) {
- case RTC_UIE_OFF: /* update off */
- at91_sys_write(AT91_RTC_IDR, AT91_RTC_SECEV);
- break;
- case RTC_UIE_ON: /* update on */
- at91_sys_write(AT91_RTC_SCCR, AT91_RTC_SECEV);
- at91_sys_write(AT91_RTC_IER, AT91_RTC_SECEV);
- break;
- default:
- ret = -ENOIOCTLCMD;
- break;
- }
-
- return ret;
-}
-
static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
pr_debug("%s(): cmd=%08x\n", __func__, enabled);
@@ -269,7 +242,6 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id)
}
static const struct rtc_class_ops at91_rtc_ops = {
- .ioctl = at91_rtc_ioctl,
.read_time = at91_rtc_readtime,
.set_time = at91_rtc_settime,
.read_alarm = at91_rtc_readalarm,
diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index c36749e4c926..a3ad957507dc 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -216,33 +216,6 @@ static int at91_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
return 0;
}
-/*
- * Handle commands from user-space
- */
-static int at91_rtc_ioctl(struct device *dev, unsigned int cmd,
- unsigned long arg)
-{
- struct sam9_rtc *rtc = dev_get_drvdata(dev);
- int ret = 0;
- u32 mr = rtt_readl(rtc, MR);
-
- dev_dbg(dev, "ioctl: cmd=%08x, arg=%08lx, mr %08x\n", cmd, arg, mr);
-
- switch (cmd) {
- case RTC_UIE_OFF: /* update off */
- rtt_writel(rtc, MR, mr & ~AT91_RTT_RTTINCIEN);
- break;
- case RTC_UIE_ON: /* update on */
- rtt_writel(rtc, MR, mr | AT91_RTT_RTTINCIEN);
- break;
- default:
- ret = -ENOIOCTLCMD;
- break;
- }
-
- return ret;
-}
-
static int at91_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
struct sam9_rtc *rtc = dev_get_drvdata(dev);
@@ -303,13 +276,12 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *_rtc)
}
static const struct rtc_class_ops at91_rtc_ops = {
- .ioctl = at91_rtc_ioctl,
.read_time = at91_rtc_readtime,
.set_time = at91_rtc_settime,
.read_alarm = at91_rtc_readalarm,
.set_alarm = at91_rtc_setalarm,
.proc = at91_rtc_proc,
- .alarm_irq_enabled = at91_rtc_alarm_irq_enable,
+ .alarm_irq_enable = at91_rtc_alarm_irq_enable,
};
/*
diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c
index 17971d93354d..ca9cff85ab8a 100644
--- a/drivers/rtc/rtc-bfin.c
+++ b/drivers/rtc/rtc-bfin.c
@@ -240,32 +240,6 @@ static void bfin_rtc_int_set_alarm(struct bfin_rtc *rtc)
*/
bfin_rtc_int_set(rtc->rtc_alarm.tm_yday == -1 ? RTC_ISTAT_ALARM : RTC_ISTAT_ALARM_DAY);
}
-static int bfin_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
-{
- struct bfin_rtc *rtc = dev_get_drvdata(dev);
- int ret = 0;
-
- dev_dbg_stamp(dev);
-
- bfin_rtc_sync_pending(dev);
-
- switch (cmd) {
- case RTC_UIE_ON:
- dev_dbg_stamp(dev);
- bfin_rtc_int_set(RTC_ISTAT_SEC);
- break;
- case RTC_UIE_OFF:
- dev_dbg_stamp(dev);
- bfin_rtc_int_clear(~RTC_ISTAT_SEC);
- break;
-
- default:
- dev_dbg_stamp(dev);
- ret = -ENOIOCTLCMD;
- }
-
- return ret;
-}
static int bfin_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
@@ -358,7 +332,6 @@ static int bfin_rtc_proc(struct device *dev, struct seq_file *seq)
}
static struct rtc_class_ops bfin_rtc_ops = {
- .ioctl = bfin_rtc_ioctl,
.read_time = bfin_rtc_read_time,
.set_time = bfin_rtc_set_time,
.read_alarm = bfin_rtc_read_alarm,
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index c7ff8df347e7..911e75cdc125 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -37,6 +37,8 @@
#include <linux/mod_devicetable.h>
#include <linux/log2.h>
#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
/* this is for "generic access to PC-style RTC" using CMOS_READ/CMOS_WRITE */
#include <asm-generic/rtc.h>
@@ -375,50 +377,6 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
return 0;
}
-static int cmos_irq_set_freq(struct device *dev, int freq)
-{
- struct cmos_rtc *cmos = dev_get_drvdata(dev);
- int f;
- unsigned long flags;
-
- if (!is_valid_irq(cmos->irq))
- return -ENXIO;
-
- if (!is_power_of_2(freq))
- return -EINVAL;
- /* 0 = no irqs; 1 = 2^15 Hz ... 15 = 2^0 Hz */
- f = ffs(freq);
- if (f-- > 16)
- return -EINVAL;
- f = 16 - f;
-
- spin_lock_irqsave(&rtc_lock, flags);
- hpet_set_periodic_freq(freq);
- CMOS_WRITE(RTC_REF_CLCK_32KHZ | f, RTC_FREQ_SELECT);
- spin_unlock_irqrestore(&rtc_lock, flags);
-
- return 0;
-}
-
-static int cmos_irq_set_state(struct device *dev, int enabled)
-{
- struct cmos_rtc *cmos = dev_get_drvdata(dev);
- unsigned long flags;
-
- if (!is_valid_irq(cmos->irq))
- return -ENXIO;
-
- spin_lock_irqsave(&rtc_lock, flags);
-
- if (enabled)
- cmos_irq_enable(cmos, RTC_PIE);
- else
- cmos_irq_disable(cmos, RTC_PIE);
-
- spin_unlock_irqrestore(&rtc_lock, flags);
- return 0;
-}
-
static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
struct cmos_rtc *cmos = dev_get_drvdata(dev);
@@ -438,25 +396,6 @@ static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int cmos_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- struct cmos_rtc *cmos = dev_get_drvdata(dev);
- unsigned long flags;
-
- if (!is_valid_irq(cmos->irq))
- return -EINVAL;
-
- spin_lock_irqsave(&rtc_lock, flags);
-
- if (enabled)
- cmos_irq_enable(cmos, RTC_UIE);
- else
- cmos_irq_disable(cmos, RTC_UIE);
-
- spin_unlock_irqrestore(&rtc_lock, flags);
- return 0;
-}
-
#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE)
static int cmos_procfs(struct device *dev, struct seq_file *seq)
@@ -501,10 +440,7 @@ static const struct rtc_class_ops cmos_rtc_ops = {
.read_alarm = cmos_read_alarm,
.set_alarm = cmos_set_alarm,
.proc = cmos_procfs,
- .irq_set_freq = cmos_irq_set_freq,
- .irq_set_state = cmos_irq_set_state,
.alarm_irq_enable = cmos_alarm_irq_enable,
- .update_irq_enable = cmos_update_irq_enable,
};
/*----------------------------------------------------------------*/
@@ -1123,6 +1059,47 @@ static struct pnp_driver cmos_pnp_driver = {
#endif /* CONFIG_PNP */
+#ifdef CONFIG_OF
+static const struct of_device_id of_cmos_match[] = {
+ {
+ .compatible = "motorola,mc146818",
+ },
+ { },
+};
+MODULE_DEVICE_TABLE(of, of_cmos_match);
+
+static __init void cmos_of_init(struct platform_device *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct rtc_time time;
+ int ret;
+ const __be32 *val;
+
+ if (!node)
+ return;
+
+ val = of_get_property(node, "ctrl-reg", NULL);
+ if (val)
+ CMOS_WRITE(be32_to_cpup(val), RTC_CONTROL);
+
+ val = of_get_property(node, "freq-reg", NULL);
+ if (val)
+ CMOS_WRITE(be32_to_cpup(val), RTC_FREQ_SELECT);
+
+ get_rtc_time(&time);
+ ret = rtc_valid_tm(&time);
+ if (ret) {
+ struct rtc_time def_time = {
+ .tm_year = 1,
+ .tm_mday = 1,
+ };
+ set_rtc_time(&def_time);
+ }
+}
+#else
+static inline void cmos_of_init(struct platform_device *pdev) {}
+#define of_cmos_match NULL
+#endif
/*----------------------------------------------------------------*/
/* Platform setup should have set up an RTC device, when PNP is
@@ -1131,6 +1108,7 @@ static struct pnp_driver cmos_pnp_driver = {
static int __init cmos_platform_probe(struct platform_device *pdev)
{
+ cmos_of_init(pdev);
cmos_wake_setup(&pdev->dev);
return cmos_do_probe(&pdev->dev,
platform_get_resource(pdev, IORESOURCE_IO, 0),
@@ -1162,6 +1140,7 @@ static struct platform_driver cmos_platform_driver = {
#ifdef CONFIG_PM
.pm = &cmos_pm_ops,
#endif
+ .of_match_table = of_cmos_match,
}
};
diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c
index 34647fc1ee98..8d46838dff8a 100644
--- a/drivers/rtc/rtc-davinci.c
+++ b/drivers/rtc/rtc-davinci.c
@@ -231,10 +231,6 @@ davinci_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
case RTC_WIE_OFF:
rtc_ctrl &= ~PRTCSS_RTC_CTRL_WEN;
break;
- case RTC_UIE_OFF:
- case RTC_UIE_ON:
- ret = -ENOTTY;
- break;
default:
ret = -ENOIOCTLCMD;
}
@@ -473,55 +469,6 @@ static int davinci_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
return 0;
}
-static int davinci_rtc_irq_set_state(struct device *dev, int enabled)
-{
- struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
- unsigned long flags;
- u8 rtc_ctrl;
-
- spin_lock_irqsave(&davinci_rtc_lock, flags);
-
- rtc_ctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL);
-
- if (enabled) {
- while (rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL)
- & PRTCSS_RTC_CTRL_WDTBUS)
- cpu_relax();
-
- rtc_ctrl |= PRTCSS_RTC_CTRL_TE;
- rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL);
-
- rtcss_write(davinci_rtc, 0x0, PRTCSS_RTC_CLKC_CNT);
-
- rtc_ctrl |= PRTCSS_RTC_CTRL_TIEN |
- PRTCSS_RTC_CTRL_TMMD |
- PRTCSS_RTC_CTRL_TMRFLG;
- } else
- rtc_ctrl &= ~PRTCSS_RTC_CTRL_TIEN;
-
- rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL);
-
- spin_unlock_irqrestore(&davinci_rtc_lock, flags);
-
- return 0;
-}
-
-static int davinci_rtc_irq_set_freq(struct device *dev, int freq)
-{
- struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
- unsigned long flags;
- u16 tmr_counter = (0x8000 >> (ffs(freq) - 1));
-
- spin_lock_irqsave(&davinci_rtc_lock, flags);
-
- rtcss_write(davinci_rtc, tmr_counter & 0xFF, PRTCSS_RTC_TMR0);
- rtcss_write(davinci_rtc, (tmr_counter & 0xFF00) >> 8, PRTCSS_RTC_TMR1);
-
- spin_unlock_irqrestore(&davinci_rtc_lock, flags);
-
- return 0;
-}
-
static struct rtc_class_ops davinci_rtc_ops = {
.ioctl = davinci_rtc_ioctl,
.read_time = davinci_rtc_read_time,
@@ -529,8 +476,6 @@ static struct rtc_class_ops davinci_rtc_ops = {
.alarm_irq_enable = davinci_rtc_alarm_irq_enable,
.read_alarm = davinci_rtc_read_alarm,
.set_alarm = davinci_rtc_set_alarm,
- .irq_set_state = davinci_rtc_irq_set_state,
- .irq_set_freq = davinci_rtc_irq_set_freq,
};
static int __init davinci_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 37c3cc1b3dd5..d0e06edb14c5 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -46,6 +46,105 @@ static int rtc_dev_open(struct inode *inode, struct file *file)
return err;
}
+#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
+/*
+ * Routine to poll RTC seconds field for change as often as possible,
+ * after first RTC_UIE use timer to reduce polling
+ */
+static void rtc_uie_task(struct work_struct *work)
+{
+ struct rtc_device *rtc =
+ container_of(work, struct rtc_device, uie_task);
+ struct rtc_time tm;
+ int num = 0;
+ int err;
+
+ err = rtc_read_time(rtc, &tm);
+
+ spin_lock_irq(&rtc->irq_lock);
+ if (rtc->stop_uie_polling || err) {
+ rtc->uie_task_active = 0;
+ } else if (rtc->oldsecs != tm.tm_sec) {
+ num = (tm.tm_sec + 60 - rtc->oldsecs) % 60;
+ rtc->oldsecs = tm.tm_sec;
+ rtc->uie_timer.expires = jiffies + HZ - (HZ/10);
+ rtc->uie_timer_active = 1;
+ rtc->uie_task_active = 0;
+ add_timer(&rtc->uie_timer);
+ } else if (schedule_work(&rtc->uie_task) == 0) {
+ rtc->uie_task_active = 0;
+ }
+ spin_unlock_irq(&rtc->irq_lock);
+ if (num)
+ rtc_handle_legacy_irq(rtc, num, RTC_UF);
+}
+static void rtc_uie_timer(unsigned long data)
+{
+ struct rtc_device *rtc = (struct rtc_device *)data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc->irq_lock, flags);
+ rtc->uie_timer_active = 0;
+ rtc->uie_task_active = 1;
+ if ((schedule_work(&rtc->uie_task) == 0))
+ rtc->uie_task_active = 0;
+ spin_unlock_irqrestore(&rtc->irq_lock, flags);
+}
+
+static int clear_uie(struct rtc_device *rtc)
+{
+ spin_lock_irq(&rtc->irq_lock);
+ if (rtc->uie_irq_active) {
+ rtc->stop_uie_polling = 1;
+ if (rtc->uie_timer_active) {
+ spin_unlock_irq(&rtc->irq_lock);
+ del_timer_sync(&rtc->uie_timer);
+ spin_lock_irq(&rtc->irq_lock);
+ rtc->uie_timer_active = 0;
+ }
+ if (rtc->uie_task_active) {
+ spin_unlock_irq(&rtc->irq_lock);
+ flush_scheduled_work();
+ spin_lock_irq(&rtc->irq_lock);
+ }
+ rtc->uie_irq_active = 0;
+ }
+ spin_unlock_irq(&rtc->irq_lock);
+ return 0;
+}
+
+static int set_uie(struct rtc_device *rtc)
+{
+ struct rtc_time tm;
+ int err;
+
+ err = rtc_read_time(rtc, &tm);
+ if (err)
+ return err;
+ spin_lock_irq(&rtc->irq_lock);
+ if (!rtc->uie_irq_active) {
+ rtc->uie_irq_active = 1;
+ rtc->stop_uie_polling = 0;
+ rtc->oldsecs = tm.tm_sec;
+ rtc->uie_task_active = 1;
+ if (schedule_work(&rtc->uie_task) == 0)
+ rtc->uie_task_active = 0;
+ }
+ rtc->irq_data = 0;
+ spin_unlock_irq(&rtc->irq_lock);
+ return 0;
+}
+
+int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc, unsigned int enabled)
+{
+ if (enabled)
+ return set_uie(rtc);
+ else
+ return clear_uie(rtc);
+}
+EXPORT_SYMBOL(rtc_dev_update_irq_enable_emul);
+
+#endif /* CONFIG_RTC_INTF_DEV_UIE_EMUL */
static ssize_t
rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
@@ -387,6 +486,11 @@ void rtc_dev_prepare(struct rtc_device *rtc)
rtc->dev.devt = MKDEV(MAJOR(rtc_devt), rtc->id);
+#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
+ INIT_WORK(&rtc->uie_task, rtc_uie_task);
+ setup_timer(&rtc->uie_timer, rtc_uie_timer, (unsigned long)rtc);
+#endif
+
cdev_init(&rtc->char_dev, &rtc_dev_fops);
rtc->char_dev.owner = rtc->owner;
}
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 37268e97de49..3fffd708711f 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -397,29 +397,12 @@ static int ds1511_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int ds1511_rtc_update_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- struct platform_device *pdev = to_platform_device(dev);
- struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
-
- if (pdata->irq <= 0)
- return -EINVAL;
- if (enabled)
- pdata->irqen |= RTC_UF;
- else
- pdata->irqen &= ~RTC_UF;
- ds1511_rtc_update_alarm(pdata);
- return 0;
-}
-
static const struct rtc_class_ops ds1511_rtc_ops = {
.read_time = ds1511_rtc_read_time,
.set_time = ds1511_rtc_set_time,
.read_alarm = ds1511_rtc_read_alarm,
.set_alarm = ds1511_rtc_set_alarm,
.alarm_irq_enable = ds1511_rtc_alarm_irq_enable,
- .update_irq_enable = ds1511_rtc_update_irq_enable,
};
static ssize_t
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index ff432e2ca275..fee41b97c9e8 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -227,29 +227,12 @@ static int ds1553_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int ds1553_rtc_update_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- struct platform_device *pdev = to_platform_device(dev);
- struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
-
- if (pdata->irq <= 0)
- return -EINVAL;
- if (enabled)
- pdata->irqen |= RTC_UF;
- else
- pdata->irqen &= ~RTC_UF;
- ds1553_rtc_update_alarm(pdata);
- return 0;
-}
-
static const struct rtc_class_ops ds1553_rtc_ops = {
.read_time = ds1553_rtc_read_time,
.set_time = ds1553_rtc_set_time,
.read_alarm = ds1553_rtc_read_alarm,
.set_alarm = ds1553_rtc_set_alarm,
.alarm_irq_enable = ds1553_rtc_alarm_irq_enable,
- .update_irq_enable = ds1553_rtc_update_irq_enable,
};
static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj,
diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index 23a9ee19764c..27b7bf672ac6 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -1,7 +1,7 @@
/*
* RTC client/driver for the Maxim/Dallas DS3232 Real-Time Clock over I2C
*
- * Copyright (C) 2009-2010 Freescale Semiconductor.
+ * Copyright (C) 2009-2011 Freescale Semiconductor.
* Author: Jack Lan <jack.lan@freescale.com>
*
* This program is free software; you can redistribute it and/or modify it
@@ -141,9 +141,11 @@ static int ds3232_read_time(struct device *dev, struct rtc_time *time)
time->tm_hour = bcd2bin(hour);
}
- time->tm_wday = bcd2bin(week);
+ /* Day of the week in linux range is 0~6 while 1~7 in RTC chip */
+ time->tm_wday = bcd2bin(week) - 1;
time->tm_mday = bcd2bin(day);
- time->tm_mon = bcd2bin(month & 0x7F);
+ /* linux tm_mon range:0~11, while month range is 1~12 in RTC chip */
+ time->tm_mon = bcd2bin(month & 0x7F) - 1;
if (century)
add_century = 100;
@@ -162,9 +164,11 @@ static int ds3232_set_time(struct device *dev, struct rtc_time *time)
buf[0] = bin2bcd(time->tm_sec);
buf[1] = bin2bcd(time->tm_min);
buf[2] = bin2bcd(time->tm_hour);
- buf[3] = bin2bcd(time->tm_wday); /* Day of the week */
+ /* Day of the week in linux range is 0~6 while 1~7 in RTC chip */
+ buf[3] = bin2bcd(time->tm_wday + 1);
buf[4] = bin2bcd(time->tm_mday); /* Date */
- buf[5] = bin2bcd(time->tm_mon);
+ /* linux tm_mon range:0~11, while month range is 1~12 in RTC chip */
+ buf[5] = bin2bcd(time->tm_mon + 1);
if (time->tm_year >= 100) {
buf[5] |= 0x80;
buf[6] = bin2bcd(time->tm_year - 100);
@@ -335,23 +339,6 @@ static int ds3232_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int ds3232_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct ds3232 *ds3232 = i2c_get_clientdata(client);
-
- if (client->irq <= 0)
- return -EINVAL;
-
- if (enabled)
- ds3232->rtc->irq_data |= RTC_UF;
- else
- ds3232->rtc->irq_data &= ~RTC_UF;
-
- ds3232_update_alarm(client);
- return 0;
-}
-
static irqreturn_t ds3232_irq(int irq, void *dev_id)
{
struct i2c_client *client = dev_id;
@@ -402,7 +389,6 @@ static const struct rtc_class_ops ds3232_rtc_ops = {
.read_alarm = ds3232_read_alarm,
.set_alarm = ds3232_set_alarm,
.alarm_irq_enable = ds3232_alarm_irq_enable,
- .update_irq_enable = ds3232_update_irq_enable,
};
static int __devinit ds3232_probe(struct i2c_client *client,
diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c
index 2e16f72c9056..b6473631d182 100644
--- a/drivers/rtc/rtc-jz4740.c
+++ b/drivers/rtc/rtc-jz4740.c
@@ -168,12 +168,6 @@ static int jz4740_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
return ret;
}
-static int jz4740_rtc_update_irq_enable(struct device *dev, unsigned int enable)
-{
- struct jz4740_rtc *rtc = dev_get_drvdata(dev);
- return jz4740_rtc_ctrl_set_bits(rtc, JZ_RTC_CTRL_1HZ_IRQ, enable);
-}
-
static int jz4740_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
{
struct jz4740_rtc *rtc = dev_get_drvdata(dev);
@@ -185,7 +179,6 @@ static struct rtc_class_ops jz4740_rtc_ops = {
.set_mmss = jz4740_rtc_set_mmss,
.read_alarm = jz4740_rtc_read_alarm,
.set_alarm = jz4740_rtc_set_alarm,
- .update_irq_enable = jz4740_rtc_update_irq_enable,
.alarm_irq_enable = jz4740_rtc_alarm_irq_enable,
};
diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c
index 5314b153bfba..c42006469559 100644
--- a/drivers/rtc/rtc-mc13xxx.c
+++ b/drivers/rtc/rtc-mc13xxx.c
@@ -282,12 +282,6 @@ static irqreturn_t mc13xxx_rtc_update_handler(int irq, void *dev)
return IRQ_HANDLED;
}
-static int mc13xxx_rtc_update_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- return mc13xxx_rtc_irq_enable(dev, enabled, MC13XXX_IRQ_1HZ);
-}
-
static int mc13xxx_rtc_alarm_irq_enable(struct device *dev,
unsigned int enabled)
{
@@ -300,7 +294,6 @@ static const struct rtc_class_ops mc13xxx_rtc_ops = {
.read_alarm = mc13xxx_rtc_read_alarm,
.set_alarm = mc13xxx_rtc_set_alarm,
.alarm_irq_enable = mc13xxx_rtc_alarm_irq_enable,
- .update_irq_enable = mc13xxx_rtc_update_irq_enable,
};
static irqreturn_t mc13xxx_rtc_reset_handler(int irq, void *dev)
diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c
index dfcdf0901d21..b40c1ff1ebc8 100644
--- a/drivers/rtc/rtc-mpc5121.c
+++ b/drivers/rtc/rtc-mpc5121.c
@@ -240,32 +240,12 @@ static int mpc5121_rtc_alarm_irq_enable(struct device *dev,
return 0;
}
-static int mpc5121_rtc_update_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- struct mpc5121_rtc_data *rtc = dev_get_drvdata(dev);
- struct mpc5121_rtc_regs __iomem *regs = rtc->regs;
- int val;
-
- val = in_8(&regs->int_enable);
-
- if (enabled)
- val = (val & ~0x8) | 0x1;
- else
- val &= ~0x1;
-
- out_8(&regs->int_enable, val);
-
- return 0;
-}
-
static const struct rtc_class_ops mpc5121_rtc_ops = {
.read_time = mpc5121_rtc_read_time,
.set_time = mpc5121_rtc_set_time,
.read_alarm = mpc5121_rtc_read_alarm,
.set_alarm = mpc5121_rtc_set_alarm,
.alarm_irq_enable = mpc5121_rtc_alarm_irq_enable,
- .update_irq_enable = mpc5121_rtc_update_irq_enable,
};
static int __devinit mpc5121_rtc_probe(struct platform_device *op,
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
index 1db62db8469d..b86bc328463b 100644
--- a/drivers/rtc/rtc-mrst.c
+++ b/drivers/rtc/rtc-mrst.c
@@ -62,6 +62,17 @@ static inline int is_intr(u8 rtc_intr)
return rtc_intr & RTC_IRQMASK;
}
+static inline unsigned char vrtc_is_updating(void)
+{
+ unsigned char uip;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ uip = (vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return uip;
+}
+
/*
* rtc_time's year contains the increment over 1900, but vRTC's YEAR
* register can't be programmed to value larger than 0x64, so vRTC
@@ -76,7 +87,7 @@ static int mrst_read_time(struct device *dev, struct rtc_time *time)
{
unsigned long flags;
- if (rtc_is_updating())
+ if (vrtc_is_updating())
mdelay(20);
spin_lock_irqsave(&rtc_lock, flags);
@@ -236,25 +247,6 @@ static int mrst_set_alarm(struct device *dev, struct rtc_wkalrm *t)
return 0;
}
-static int mrst_irq_set_state(struct device *dev, int enabled)
-{
- struct mrst_rtc *mrst = dev_get_drvdata(dev);
- unsigned long flags;
-
- if (!mrst->irq)
- return -ENXIO;
-
- spin_lock_irqsave(&rtc_lock, flags);
-
- if (enabled)
- mrst_irq_enable(mrst, RTC_PIE);
- else
- mrst_irq_disable(mrst, RTC_PIE);
-
- spin_unlock_irqrestore(&rtc_lock, flags);
- return 0;
-}
-
/* Currently, the vRTC doesn't support UIE ON/OFF */
static int mrst_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
@@ -301,7 +293,6 @@ static const struct rtc_class_ops mrst_rtc_ops = {
.read_alarm = mrst_read_alarm,
.set_alarm = mrst_set_alarm,
.proc = mrst_procfs,
- .irq_set_state = mrst_irq_set_state,
.alarm_irq_enable = mrst_rtc_alarm_irq_enable,
};
diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
index 0b06c1e03fd5..826ab64a8fa9 100644
--- a/drivers/rtc/rtc-mxc.c
+++ b/drivers/rtc/rtc-mxc.c
@@ -274,12 +274,6 @@ static int mxc_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int mxc_rtc_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- mxc_rtc_irq_enable(dev, RTC_1HZ_BIT, enabled);
- return 0;
-}
-
/*
* This function reads the current RTC time into tm in Gregorian date.
*/
@@ -368,7 +362,6 @@ static struct rtc_class_ops mxc_rtc_ops = {
.read_alarm = mxc_rtc_read_alarm,
.set_alarm = mxc_rtc_set_alarm,
.alarm_irq_enable = mxc_rtc_alarm_irq_enable,
- .update_irq_enable = mxc_rtc_update_irq_enable,
};
static int __init mxc_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c
index ddb0857e15a4..781068d62f23 100644
--- a/drivers/rtc/rtc-nuc900.c
+++ b/drivers/rtc/rtc-nuc900.c
@@ -134,20 +134,6 @@ static void nuc900_rtc_bin2bcd(struct device *dev, struct rtc_time *settm,
gettm->bcd_hour = bin2bcd(settm->tm_hour) << 16;
}
-static int nuc900_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- struct nuc900_rtc *rtc = dev_get_drvdata(dev);
-
- if (enabled)
- __raw_writel(__raw_readl(rtc->rtc_reg + REG_RTC_RIER)|
- (TICKINTENB), rtc->rtc_reg + REG_RTC_RIER);
- else
- __raw_writel(__raw_readl(rtc->rtc_reg + REG_RTC_RIER)&
- (~TICKINTENB), rtc->rtc_reg + REG_RTC_RIER);
-
- return 0;
-}
-
static int nuc900_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
struct nuc900_rtc *rtc = dev_get_drvdata(dev);
@@ -234,7 +220,6 @@ static struct rtc_class_ops nuc900_rtc_ops = {
.read_alarm = nuc900_rtc_read_alarm,
.set_alarm = nuc900_rtc_set_alarm,
.alarm_irq_enable = nuc900_alarm_irq_enable,
- .update_irq_enable = nuc900_update_irq_enable,
};
static int __devinit nuc900_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index b4dbf3a319b3..de0dd7b1f146 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -135,44 +135,6 @@ static irqreturn_t rtc_irq(int irq, void *rtc)
return IRQ_HANDLED;
}
-#ifdef CONFIG_RTC_INTF_DEV
-
-static int
-omap_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
-{
- u8 reg;
-
- switch (cmd) {
- case RTC_UIE_OFF:
- case RTC_UIE_ON:
- break;
- default:
- return -ENOIOCTLCMD;
- }
-
- local_irq_disable();
- rtc_wait_not_busy();
- reg = rtc_read(OMAP_RTC_INTERRUPTS_REG);
- switch (cmd) {
- /* UIE = Update Interrupt Enable (1/second) */
- case RTC_UIE_OFF:
- reg &= ~OMAP_RTC_INTERRUPTS_IT_TIMER;
- break;
- case RTC_UIE_ON:
- reg |= OMAP_RTC_INTERRUPTS_IT_TIMER;
- break;
- }
- rtc_wait_not_busy();
- rtc_write(reg, OMAP_RTC_INTERRUPTS_REG);
- local_irq_enable();
-
- return 0;
-}
-
-#else
-#define omap_rtc_ioctl NULL
-#endif
-
static int omap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
u8 reg;
@@ -313,7 +275,6 @@ static int omap_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
}
static struct rtc_class_ops omap_rtc_ops = {
- .ioctl = omap_rtc_ioctl,
.read_time = omap_rtc_read_time,
.set_time = omap_rtc_set_time,
.read_alarm = omap_rtc_read_alarm,
diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c
index 25c0b3fd44f1..a633abc42896 100644
--- a/drivers/rtc/rtc-pcap.c
+++ b/drivers/rtc/rtc-pcap.c
@@ -131,18 +131,12 @@ static int pcap_rtc_alarm_irq_enable(struct device *dev, unsigned int en)
return pcap_rtc_irq_enable(dev, PCAP_IRQ_TODA, en);
}
-static int pcap_rtc_update_irq_enable(struct device *dev, unsigned int en)
-{
- return pcap_rtc_irq_enable(dev, PCAP_IRQ_1HZ, en);
-}
-
static const struct rtc_class_ops pcap_rtc_ops = {
.read_time = pcap_rtc_read_time,
.read_alarm = pcap_rtc_read_alarm,
.set_alarm = pcap_rtc_set_alarm,
.set_mmss = pcap_rtc_set_mmss,
.alarm_irq_enable = pcap_rtc_alarm_irq_enable,
- .update_irq_enable = pcap_rtc_update_irq_enable,
};
static int __devinit pcap_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-pcf50633.c b/drivers/rtc/rtc-pcf50633.c
index 16edf94ab42f..f90c574f9d05 100644
--- a/drivers/rtc/rtc-pcf50633.c
+++ b/drivers/rtc/rtc-pcf50633.c
@@ -106,25 +106,6 @@ pcf50633_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int
-pcf50633_rtc_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- struct pcf50633_rtc *rtc = dev_get_drvdata(dev);
- int err;
-
- if (enabled)
- err = pcf50633_irq_unmask(rtc->pcf, PCF50633_IRQ_SECOND);
- else
- err = pcf50633_irq_mask(rtc->pcf, PCF50633_IRQ_SECOND);
-
- if (err < 0)
- return err;
-
- rtc->second_enabled = enabled;
-
- return 0;
-}
-
static int pcf50633_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
struct pcf50633_rtc *rtc;
@@ -262,8 +243,7 @@ static struct rtc_class_ops pcf50633_rtc_ops = {
.set_time = pcf50633_rtc_set_time,
.read_alarm = pcf50633_rtc_read_alarm,
.set_alarm = pcf50633_rtc_set_alarm,
- .alarm_irq_enable = pcf50633_rtc_alarm_irq_enable,
- .update_irq_enable = pcf50633_rtc_update_irq_enable,
+ .alarm_irq_enable = pcf50633_rtc_alarm_irq_enable,
};
static void pcf50633_rtc_irq(int irq, void *data)
diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c
index bbdb2f02798a..d554368c9f57 100644
--- a/drivers/rtc/rtc-pl030.c
+++ b/drivers/rtc/rtc-pl030.c
@@ -35,11 +35,6 @@ static irqreturn_t pl030_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static int pl030_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
-{
- return -ENOIOCTLCMD;
-}
-
static int pl030_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
{
struct pl030_rtc *rtc = dev_get_drvdata(dev);
@@ -96,7 +91,6 @@ static int pl030_set_time(struct device *dev, struct rtc_time *tm)
}
static const struct rtc_class_ops pl030_ops = {
- .ioctl = pl030_ioctl,
.read_time = pl030_read_time,
.set_time = pl030_set_time,
.read_alarm = pl030_read_alarm,
diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index b7a6690e5b35..d829ea63c4fb 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -293,57 +293,6 @@ static int pl031_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
return ret;
}
-/* Periodic interrupt is only available in ST variants. */
-static int pl031_irq_set_state(struct device *dev, int enabled)
-{
- struct pl031_local *ldata = dev_get_drvdata(dev);
-
- if (enabled == 1) {
- /* Clear any pending timer interrupt. */
- writel(RTC_BIT_PI, ldata->base + RTC_ICR);
-
- writel(readl(ldata->base + RTC_IMSC) | RTC_BIT_PI,
- ldata->base + RTC_IMSC);
-
- /* Now start the timer */
- writel(readl(ldata->base + RTC_TCR) | RTC_TCR_EN,
- ldata->base + RTC_TCR);
-
- } else {
- writel(readl(ldata->base + RTC_IMSC) & (~RTC_BIT_PI),
- ldata->base + RTC_IMSC);
-
- /* Also stop the timer */
- writel(readl(ldata->base + RTC_TCR) & (~RTC_TCR_EN),
- ldata->base + RTC_TCR);
- }
- /* Wait at least 1 RTC32 clock cycle to ensure next access
- * to RTC_TCR will succeed.
- */
- udelay(40);
-
- return 0;
-}
-
-static int pl031_irq_set_freq(struct device *dev, int freq)
-{
- struct pl031_local *ldata = dev_get_drvdata(dev);
-
- /* Cant set timer if it is already enabled */
- if (readl(ldata->base + RTC_TCR) & RTC_TCR_EN) {
- dev_err(dev, "can't change frequency while timer enabled\n");
- return -EINVAL;
- }
-
- /* If self start bit in RTC_TCR is set timer will start here,
- * but we never set that bit. Instead we start the timer when
- * set_state is called with enabled == 1.
- */
- writel(RTC_TIMER_FREQ / freq, ldata->base + RTC_TLR);
-
- return 0;
-}
-
static int pl031_remove(struct amba_device *adev)
{
struct pl031_local *ldata = dev_get_drvdata(&adev->dev);
@@ -440,8 +389,6 @@ static struct rtc_class_ops stv1_pl031_ops = {
.read_alarm = pl031_read_alarm,
.set_alarm = pl031_set_alarm,
.alarm_irq_enable = pl031_alarm_irq_enable,
- .irq_set_state = pl031_irq_set_state,
- .irq_set_freq = pl031_irq_set_freq,
};
/* And the second ST derivative */
@@ -451,8 +398,6 @@ static struct rtc_class_ops stv2_pl031_ops = {
.read_alarm = pl031_stv2_read_alarm,
.set_alarm = pl031_stv2_set_alarm,
.alarm_irq_enable = pl031_alarm_irq_enable,
- .irq_set_state = pl031_irq_set_state,
- .irq_set_freq = pl031_irq_set_freq,
};
static struct amba_id pl031_ids[] = {
diff --git a/drivers/rtc/rtc-proc.c b/drivers/rtc/rtc-proc.c
index 242bbf86c74a..0a59fda5c09d 100644
--- a/drivers/rtc/rtc-proc.c
+++ b/drivers/rtc/rtc-proc.c
@@ -69,6 +69,14 @@ static int rtc_proc_show(struct seq_file *seq, void *offset)
alrm.enabled ? "yes" : "no");
seq_printf(seq, "alrm_pending\t: %s\n",
alrm.pending ? "yes" : "no");
+ seq_printf(seq, "update IRQ enabled\t: %s\n",
+ (rtc->uie_rtctimer.enabled) ? "yes" : "no");
+ seq_printf(seq, "periodic IRQ enabled\t: %s\n",
+ (rtc->pie_enabled) ? "yes" : "no");
+ seq_printf(seq, "periodic IRQ frequency\t: %d\n",
+ rtc->irq_freq);
+ seq_printf(seq, "max user IRQ frequency\t: %d\n",
+ rtc->max_user_freq);
}
seq_printf(seq, "24hr\t\t: yes\n");
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index 29e867a1aaa8..fc9f4991574b 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -209,32 +209,6 @@ static void pxa_rtc_release(struct device *dev)
free_irq(pxa_rtc->irq_1Hz, dev);
}
-static int pxa_periodic_irq_set_freq(struct device *dev, int freq)
-{
- struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
- int period_ms;
-
- if (freq < 1 || freq > MAXFREQ_PERIODIC)
- return -EINVAL;
-
- period_ms = 1000 / freq;
- rtc_writel(pxa_rtc, PIAR, period_ms);
-
- return 0;
-}
-
-static int pxa_periodic_irq_set_state(struct device *dev, int enabled)
-{
- struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
-
- if (enabled)
- rtsr_set_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE);
- else
- rtsr_clear_bits(pxa_rtc, RTSR_PIALE | RTSR_PICE);
-
- return 0;
-}
-
static int pxa_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
@@ -250,21 +224,6 @@ static int pxa_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int pxa_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
-
- spin_lock_irq(&pxa_rtc->lock);
-
- if (enabled)
- rtsr_set_bits(pxa_rtc, RTSR_HZE);
- else
- rtsr_clear_bits(pxa_rtc, RTSR_HZE);
-
- spin_unlock_irq(&pxa_rtc->lock);
- return 0;
-}
-
static int pxa_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
@@ -346,10 +305,7 @@ static const struct rtc_class_ops pxa_rtc_ops = {
.read_alarm = pxa_rtc_read_alarm,
.set_alarm = pxa_rtc_set_alarm,
.alarm_irq_enable = pxa_alarm_irq_enable,
- .update_irq_enable = pxa_update_irq_enable,
.proc = pxa_rtc_proc,
- .irq_set_state = pxa_periodic_irq_set_state,
- .irq_set_freq = pxa_periodic_irq_set_freq,
};
static int __init pxa_rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 6aaa1550e3b1..85c1b848dd72 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -281,57 +281,6 @@ static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
return rs5c372_set_datetime(to_i2c_client(dev), tm);
}
-#if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE)
-
-static int
-rs5c_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct rs5c372 *rs5c = i2c_get_clientdata(client);
- unsigned char buf;
- int status, addr;
-
- buf = rs5c->regs[RS5C_REG_CTRL1];
- switch (cmd) {
- case RTC_UIE_OFF:
- case RTC_UIE_ON:
- /* some 327a modes use a different IRQ pin for 1Hz irqs */
- if (rs5c->type == rtc_rs5c372a
- && (buf & RS5C372A_CTRL1_SL1))
- return -ENOIOCTLCMD;
- default:
- return -ENOIOCTLCMD;
- }
-
- status = rs5c_get_regs(rs5c);
- if (status < 0)
- return status;
-
- addr = RS5C_ADDR(RS5C_REG_CTRL1);
- switch (cmd) {
- case RTC_UIE_OFF: /* update off */
- buf &= ~RS5C_CTRL1_CT_MASK;
- break;
- case RTC_UIE_ON: /* update on */
- buf &= ~RS5C_CTRL1_CT_MASK;
- buf |= RS5C_CTRL1_CT4;
- break;
- }
-
- if (i2c_smbus_write_byte_data(client, addr, buf) < 0) {
- printk(KERN_WARNING "%s: can't update alarm\n",
- rs5c->rtc->name);
- status = -EIO;
- } else
- rs5c->regs[RS5C_REG_CTRL1] = buf;
-
- return status;
-}
-
-#else
-#define rs5c_rtc_ioctl NULL
-#endif
-
static int rs5c_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
@@ -480,7 +429,6 @@ static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq)
static const struct rtc_class_ops rs5c372_rtc_ops = {
.proc = rs5c372_rtc_proc,
- .ioctl = rs5c_rtc_ioctl,
.read_time = rs5c372_rtc_read_time,
.set_time = rs5c372_rtc_set_time,
.read_alarm = rs5c_read_alarm,
diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index af32a62e12a8..fde172fb2abe 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -424,37 +424,12 @@ static int rx8025_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int rx8025_irq_set_state(struct device *dev, int enabled)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct rx8025_data *rx8025 = i2c_get_clientdata(client);
- int ctrl1;
- int err;
-
- if (client->irq <= 0)
- return -ENXIO;
-
- ctrl1 = rx8025->ctrl1 & ~RX8025_BIT_CTRL1_CT;
- if (enabled)
- ctrl1 |= RX8025_BIT_CTRL1_CT_1HZ;
- if (ctrl1 != rx8025->ctrl1) {
- rx8025->ctrl1 = ctrl1;
- err = rx8025_write_reg(rx8025->client, RX8025_REG_CTRL1,
- rx8025->ctrl1);
- if (err)
- return err;
- }
-
- return 0;
-}
-
static struct rtc_class_ops rx8025_rtc_ops = {
.read_time = rx8025_get_time,
.set_time = rx8025_set_time,
.read_alarm = rx8025_read_alarm,
.set_alarm = rx8025_set_alarm,
.alarm_irq_enable = rx8025_alarm_irq_enable,
- .irq_set_state = rx8025_irq_set_state,
};
/*
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index cf953ecbfca9..714964913e5e 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -77,47 +77,18 @@ static irqreturn_t s3c_rtc_tickirq(int irq, void *id)
}
/* Update control registers */
-static void s3c_rtc_setaie(int to)
+static int s3c_rtc_setaie(struct device *dev, unsigned int enabled)
{
unsigned int tmp;
- pr_debug("%s: aie=%d\n", __func__, to);
+ pr_debug("%s: aie=%d\n", __func__, enabled);
tmp = readb(s3c_rtc_base + S3C2410_RTCALM) & ~S3C2410_RTCALM_ALMEN;
- if (to)
+ if (enabled)
tmp |= S3C2410_RTCALM_ALMEN;
writeb(tmp, s3c_rtc_base + S3C2410_RTCALM);
-}
-
-static int s3c_rtc_setpie(struct device *dev, int enabled)
-{
- unsigned int tmp;
-
- pr_debug("%s: pie=%d\n", __func__, enabled);
-
- spin_lock_irq(&s3c_rtc_pie_lock);
-
- if (s3c_rtc_cpu_type == TYPE_S3C64XX) {
- tmp = readw(s3c_rtc_base + S3C2410_RTCCON);
- tmp &= ~S3C64XX_RTCCON_TICEN;
-
- if (enabled)
- tmp |= S3C64XX_RTCCON_TICEN;
-
- writew(tmp, s3c_rtc_base + S3C2410_RTCCON);
- } else {
- tmp = readb(s3c_rtc_base + S3C2410_TICNT);
- tmp &= ~S3C2410_TICNT_ENABLE;
-
- if (enabled)
- tmp |= S3C2410_TICNT_ENABLE;
-
- writeb(tmp, s3c_rtc_base + S3C2410_TICNT);
- }
-
- spin_unlock_irq(&s3c_rtc_pie_lock);
return 0;
}
@@ -308,7 +279,7 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
writeb(alrm_en, base + S3C2410_RTCALM);
- s3c_rtc_setaie(alrm->enabled);
+ s3c_rtc_setaie(dev, alrm->enabled);
return 0;
}
@@ -377,8 +348,6 @@ static const struct rtc_class_ops s3c_rtcops = {
.set_time = s3c_rtc_settime,
.read_alarm = s3c_rtc_getalarm,
.set_alarm = s3c_rtc_setalarm,
- .irq_set_freq = s3c_rtc_setfreq,
- .irq_set_state = s3c_rtc_setpie,
.proc = s3c_rtc_proc,
.alarm_irq_enable = s3c_rtc_setaie,
};
@@ -440,7 +409,7 @@ static int __devexit s3c_rtc_remove(struct platform_device *dev)
rtc_device_unregister(rtc);
s3c_rtc_setpie(&dev->dev, 0);
- s3c_rtc_setaie(0);
+ s3c_rtc_setaie(&dev->dev, 0);
clk_disable(rtc_clk);
clk_put(rtc_clk);
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 5dfe5ffcb0d3..0b40bb88a884 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -43,7 +43,6 @@
#define RTC_DEF_TRIM 0
static const unsigned long RTC_FREQ = 1024;
-static unsigned long timer_freq;
static struct rtc_time rtc_alarm;
static DEFINE_SPINLOCK(sa1100_rtc_lock);
@@ -156,114 +155,11 @@ static irqreturn_t sa1100_rtc_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static int sa1100_irq_set_freq(struct device *dev, int freq)
-{
- if (freq < 1 || freq > timer_freq) {
- return -EINVAL;
- } else {
- struct rtc_device *rtc = (struct rtc_device *)dev;
-
- rtc->irq_freq = freq;
-
- return 0;
- }
-}
-
-static int rtc_timer1_count;
-
-static int sa1100_irq_set_state(struct device *dev, int enabled)
-{
- spin_lock_irq(&sa1100_rtc_lock);
- if (enabled) {
- struct rtc_device *rtc = (struct rtc_device *)dev;
-
- OSMR1 = timer_freq / rtc->irq_freq + OSCR;
- OIER |= OIER_E1;
- rtc_timer1_count = 1;
- } else {
- OIER &= ~OIER_E1;
- }
- spin_unlock_irq(&sa1100_rtc_lock);
-
- return 0;
-}
-
-static inline int sa1100_timer1_retrigger(struct rtc_device *rtc)
-{
- unsigned long diff;
- unsigned long period = timer_freq / rtc->irq_freq;
-
- spin_lock_irq(&sa1100_rtc_lock);
-
- do {
- OSMR1 += period;
- diff = OSMR1 - OSCR;
- /* If OSCR > OSMR1, diff is a very large number (unsigned
- * math). This means we have a lost interrupt. */
- } while (diff > period);
- OIER |= OIER_E1;
-
- spin_unlock_irq(&sa1100_rtc_lock);
-
- return 0;
-}
-
-static irqreturn_t timer1_interrupt(int irq, void *dev_id)
-{
- struct platform_device *pdev = to_platform_device(dev_id);
- struct rtc_device *rtc = platform_get_drvdata(pdev);
-
- /*
- * If we match for the first time, rtc_timer1_count will be 1.
- * Otherwise, we wrapped around (very unlikely but
- * still possible) so compute the amount of missed periods.
- * The match reg is updated only when the data is actually retrieved
- * to avoid unnecessary interrupts.
- */
- OSSR = OSSR_M1; /* clear match on timer1 */
-
- rtc_update_irq(rtc, rtc_timer1_count, RTC_PF | RTC_IRQF);
-
- if (rtc_timer1_count == 1)
- rtc_timer1_count =
- (rtc->irq_freq * ((1 << 30) / (timer_freq >> 2)));
-
- /* retrigger. */
- sa1100_timer1_retrigger(rtc);
-
- return IRQ_HANDLED;
-}
-
-static int sa1100_rtc_read_callback(struct device *dev, int data)
-{
- if (data & RTC_PF) {
- struct rtc_device *rtc = (struct rtc_device *)dev;
-
- /* interpolate missed periods and set match for the next */
- unsigned long period = timer_freq / rtc->irq_freq;
- unsigned long oscr = OSCR;
- unsigned long osmr1 = OSMR1;
- unsigned long missed = (oscr - osmr1)/period;
- data += missed << 8;
- OSSR = OSSR_M1; /* clear match on timer 1 */
- OSMR1 = osmr1 + (missed + 1)*period;
- /* Ensure we didn't miss another match in the mean time.
- * Here we compare (match - OSCR) 8 instead of 0 --
- * see comment in pxa_timer_interrupt() for explanation.
- */
- while ((signed long)((osmr1 = OSMR1) - OSCR) <= 8) {
- data += 0x100;
- OSSR = OSSR_M1; /* clear match on timer 1 */
- OSMR1 = osmr1 + period;
- }
- }
- return data;
-}
-
static int sa1100_rtc_open(struct device *dev)
{
int ret;
- struct rtc_device *rtc = (struct rtc_device *)dev;
+ struct platform_device *plat_dev = to_platform_device(dev);
+ struct rtc_device *rtc = platform_get_drvdata(plat_dev);
ret = request_irq(IRQ_RTC1Hz, sa1100_rtc_interrupt, IRQF_DISABLED,
"rtc 1Hz", dev);
@@ -277,19 +173,11 @@ static int sa1100_rtc_open(struct device *dev)
dev_err(dev, "IRQ %d already in use.\n", IRQ_RTCAlrm);
goto fail_ai;
}
- ret = request_irq(IRQ_OST1, timer1_interrupt, IRQF_DISABLED,
- "rtc timer", dev);
- if (ret) {
- dev_err(dev, "IRQ %d already in use.\n", IRQ_OST1);
- goto fail_pi;
- }
rtc->max_user_freq = RTC_FREQ;
- sa1100_irq_set_freq(dev, RTC_FREQ);
+ rtc_irq_set_freq(rtc, NULL, RTC_FREQ);
return 0;
- fail_pi:
- free_irq(IRQ_RTCAlrm, dev);
fail_ai:
free_irq(IRQ_RTC1Hz, dev);
fail_ui:
@@ -304,30 +192,10 @@ static void sa1100_rtc_release(struct device *dev)
OSSR = OSSR_M1;
spin_unlock_irq(&sa1100_rtc_lock);
- free_irq(IRQ_OST1, dev);
free_irq(IRQ_RTCAlrm, dev);
free_irq(IRQ_RTC1Hz, dev);
}
-
-static int sa1100_rtc_ioctl(struct device *dev, unsigned int cmd,
- unsigned long arg)
-{
- switch (cmd) {
- case RTC_UIE_OFF:
- spin_lock_irq(&sa1100_rtc_lock);
- RTSR &= ~RTSR_HZE;
- spin_unlock_irq(&sa1100_rtc_lock);
- return 0;
- case RTC_UIE_ON:
- spin_lock_irq(&sa1100_rtc_lock);
- RTSR |= RTSR_HZE;
- spin_unlock_irq(&sa1100_rtc_lock);
- return 0;
- }
- return -ENOIOCTLCMD;
-}
-
static int sa1100_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
spin_lock_irq(&sa1100_rtc_lock);
@@ -386,31 +254,20 @@ static int sa1100_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
static int sa1100_rtc_proc(struct device *dev, struct seq_file *seq)
{
- struct rtc_device *rtc = (struct rtc_device *)dev;
-
- seq_printf(seq, "trim/divider\t: 0x%08x\n", (u32) RTTR);
- seq_printf(seq, "update_IRQ\t: %s\n",
- (RTSR & RTSR_HZE) ? "yes" : "no");
- seq_printf(seq, "periodic_IRQ\t: %s\n",
- (OIER & OIER_E1) ? "yes" : "no");
- seq_printf(seq, "periodic_freq\t: %d\n", rtc->irq_freq);
- seq_printf(seq, "RTSR\t\t: 0x%08x\n", (u32)RTSR);
+ seq_printf(seq, "trim/divider\t\t: 0x%08x\n", (u32) RTTR);
+ seq_printf(seq, "RTSR\t\t\t: 0x%08x\n", (u32)RTSR);
return 0;
}
static const struct rtc_class_ops sa1100_rtc_ops = {
.open = sa1100_rtc_open,
- .read_callback = sa1100_rtc_read_callback,
.release = sa1100_rtc_release,
- .ioctl = sa1100_rtc_ioctl,
.read_time = sa1100_rtc_read_time,
.set_time = sa1100_rtc_set_time,
.read_alarm = sa1100_rtc_read_alarm,
.set_alarm = sa1100_rtc_set_alarm,
.proc = sa1100_rtc_proc,
- .irq_set_freq = sa1100_irq_set_freq,
- .irq_set_state = sa1100_irq_set_state,
.alarm_irq_enable = sa1100_rtc_alarm_irq_enable,
};
@@ -418,8 +275,6 @@ static int sa1100_rtc_probe(struct platform_device *pdev)
{
struct rtc_device *rtc;
- timer_freq = get_clock_tick_rate();
-
/*
* According to the manual we should be able to let RTTR be zero
* and then a default diviser for a 32.768KHz clock is used.
@@ -445,11 +300,6 @@ static int sa1100_rtc_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, rtc);
- /* Set the irq_freq */
- /*TODO: Find out who is messing with this value after we initialize
- * it here.*/
- rtc->irq_freq = RTC_FREQ;
-
/* Fix for a nasty initialization problem the in SA11xx RTSR register.
* See also the comments in sa1100_rtc_interrupt().
*
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 93314a9e7fa9..e55dc1ac83ab 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -344,27 +344,6 @@ static inline void sh_rtc_setcie(struct device *dev, unsigned int enable)
spin_unlock_irq(&rtc->lock);
}
-static int sh_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
-{
- struct sh_rtc *rtc = dev_get_drvdata(dev);
- unsigned int ret = 0;
-
- switch (cmd) {
- case RTC_UIE_OFF:
- rtc->periodic_freq &= ~PF_OXS;
- sh_rtc_setcie(dev, 0);
- break;
- case RTC_UIE_ON:
- rtc->periodic_freq |= PF_OXS;
- sh_rtc_setcie(dev, 1);
- break;
- default:
- ret = -ENOIOCTLCMD;
- }
-
- return ret;
-}
-
static int sh_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
{
sh_rtc_setaie(dev, enabled);
@@ -598,13 +577,10 @@ static int sh_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
}
static struct rtc_class_ops sh_rtc_ops = {
- .ioctl = sh_rtc_ioctl,
.read_time = sh_rtc_read_time,
.set_time = sh_rtc_set_time,
.read_alarm = sh_rtc_read_alarm,
.set_alarm = sh_rtc_set_alarm,
- .irq_set_state = sh_rtc_irq_set_state,
- .irq_set_freq = sh_rtc_irq_set_freq,
.proc = sh_rtc_proc,
.alarm_irq_enable = sh_rtc_alarm_irq_enable,
};
diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c
index 7e7d0c806f2d..572e9534b591 100644
--- a/drivers/rtc/rtc-stmp3xxx.c
+++ b/drivers/rtc/rtc-stmp3xxx.c
@@ -115,19 +115,6 @@ static int stmp3xxx_alarm_irq_enable(struct device *dev, unsigned int enabled)
return 0;
}
-static int stmp3xxx_update_irq_enable(struct device *dev, unsigned int enabled)
-{
- struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
-
- if (enabled)
- stmp3xxx_setl(BM_RTC_CTRL_ONEMSEC_IRQ_EN,
- rtc_data->io + HW_RTC_CTRL);
- else
- stmp3xxx_clearl(BM_RTC_CTRL_ONEMSEC_IRQ_EN,
- rtc_data->io + HW_RTC_CTRL);
- return 0;
-}
-
static int stmp3xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
{
struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
@@ -149,8 +136,6 @@ static int stmp3xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
static struct rtc_class_ops stmp3xxx_rtc_ops = {
.alarm_irq_enable =
stmp3xxx_alarm_irq_enable,
- .update_irq_enable =
- stmp3xxx_update_irq_enable,
.read_time = stmp3xxx_rtc_gettime,
.set_mmss = stmp3xxx_rtc_set_mmss,
.read_alarm = stmp3xxx_rtc_read_alarm,
diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c
index a82d6fe97076..7e96254bd365 100644
--- a/drivers/rtc/rtc-test.c
+++ b/drivers/rtc/rtc-test.c
@@ -78,11 +78,16 @@ static ssize_t test_irq_store(struct device *dev,
struct rtc_device *rtc = platform_get_drvdata(plat_dev);
retval = count;
- if (strncmp(buf, "tick", 4) == 0)
+ if (strncmp(buf, "tick", 4) == 0 && rtc->pie_enabled)
rtc_update_irq(rtc, 1, RTC_PF | RTC_IRQF);
- else if (strncmp(buf, "alarm", 5) == 0)
- rtc_update_irq(rtc, 1, RTC_AF | RTC_IRQF);
- else if (strncmp(buf, "update", 6) == 0)
+ else if (strncmp(buf, "alarm", 5) == 0) {
+ struct rtc_wkalrm alrm;
+ int err = rtc_read_alarm(rtc, &alrm);
+
+ if (!err && alrm.enabled)
+ rtc_update_irq(rtc, 1, RTC_AF | RTC_IRQF);
+
+ } else if (strncmp(buf, "update", 6) == 0 && rtc->uie_rtctimer.enabled)
rtc_update_irq(rtc, 1, RTC_UF | RTC_IRQF);
else
retval = -EINVAL;
diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index ed1b86828124..f9a2799c44d6 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -213,18 +213,6 @@ static int twl_rtc_alarm_irq_enable(struct device *dev, unsigned enabled)
return ret;
}
-static int twl_rtc_update_irq_enable(struct device *dev, unsigned enabled)
-{
- int ret;
-
- if (enabled)
- ret = set_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
- else
- ret = mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M);
-
- return ret;
-}
-
/*
* Gets current TWL RTC time and date parameters.
*
@@ -433,7 +421,6 @@ static struct rtc_class_ops twl_rtc_ops = {
.read_alarm = twl_rtc_read_alarm,
.set_alarm = twl_rtc_set_alarm,
.alarm_irq_enable = twl_rtc_alarm_irq_enable,
- .update_irq_enable = twl_rtc_update_irq_enable,
};
/*----------------------------------------------------------------------*/
diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index 769190ac6d11..c5698cda366a 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -207,36 +207,6 @@ static int vr41xx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
return 0;
}
-static int vr41xx_rtc_irq_set_freq(struct device *dev, int freq)
-{
- u64 count;
-
- if (!is_power_of_2(freq))
- return -EINVAL;
- count = RTC_FREQUENCY;
- do_div(count, freq);
-
- spin_lock_irq(&rtc_lock);
-
- periodic_count = count;
- rtc1_write(RTCL1LREG, periodic_count);
- rtc1_write(RTCL1HREG, periodic_count >> 16);
-
- spin_unlock_irq(&rtc_lock);
-
- return 0;
-}
-
-static int vr41xx_rtc_irq_set_state(struct device *dev, int enabled)
-{
- if (enabled)
- enable_irq(pie_irq);
- else
- disable_irq(pie_irq);
-
- return 0;
-}
-
static int vr41xx_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
@@ -308,8 +278,6 @@ static const struct rtc_class_ops vr41xx_rtc_ops = {
.set_time = vr41xx_rtc_set_time,
.read_alarm = vr41xx_rtc_read_alarm,
.set_alarm = vr41xx_rtc_set_alarm,
- .irq_set_freq = vr41xx_rtc_irq_set_freq,
- .irq_set_state = vr41xx_rtc_irq_set_state,
};
static int __devinit rtc_probe(struct platform_device *pdev)
diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c
index 82931dc65c0b..bdc909bd56da 100644
--- a/drivers/rtc/rtc-wm831x.c
+++ b/drivers/rtc/rtc-wm831x.c
@@ -315,21 +315,6 @@ static int wm831x_rtc_alarm_irq_enable(struct device *dev,
return wm831x_rtc_stop_alarm(wm831x_rtc);
}
-static int wm831x_rtc_update_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- struct wm831x_rtc *wm831x_rtc = dev_get_drvdata(dev);
- int val;
-
- if (enabled)
- val = 1 << WM831X_RTC_PINT_FREQ_SHIFT;
- else
- val = 0;
-
- return wm831x_set_bits(wm831x_rtc->wm831x, WM831X_RTC_CONTROL,
- WM831X_RTC_PINT_FREQ_MASK, val);
-}
-
static irqreturn_t wm831x_alm_irq(int irq, void *data)
{
struct wm831x_rtc *wm831x_rtc = data;
@@ -354,7 +339,6 @@ static const struct rtc_class_ops wm831x_rtc_ops = {
.read_alarm = wm831x_rtc_readalarm,
.set_alarm = wm831x_rtc_setalarm,
.alarm_irq_enable = wm831x_rtc_alarm_irq_enable,
- .update_irq_enable = wm831x_rtc_update_irq_enable,
};
#ifdef CONFIG_PM
diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index 3d0dc76b38af..66421426e404 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -302,26 +302,6 @@ static int wm8350_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
return ret;
}
-static int wm8350_rtc_update_irq_enable(struct device *dev,
- unsigned int enabled)
-{
- struct wm8350 *wm8350 = dev_get_drvdata(dev);
-
- /* Suppress duplicate changes since genirq nests enable and
- * disable calls. */
- if (enabled == wm8350->rtc.update_enabled)
- return 0;
-
- if (enabled)
- wm8350_unmask_irq(wm8350, WM8350_IRQ_RTC_SEC);
- else
- wm8350_mask_irq(wm8350, WM8350_IRQ_RTC_SEC);
-
- wm8350->rtc.update_enabled = enabled;
-
- return 0;
-}
-
static irqreturn_t wm8350_rtc_alarm_handler(int irq, void *data)
{
struct wm8350 *wm8350 = data;
@@ -357,7 +337,6 @@ static const struct rtc_class_ops wm8350_rtc_ops = {
.read_alarm = wm8350_rtc_readalarm,
.set_alarm = wm8350_rtc_setalarm,
.alarm_irq_enable = wm8350_rtc_alarm_irq_enable,
- .update_irq_enable = wm8350_rtc_update_irq_enable,
};
#ifdef CONFIG_PM
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 318672d05563..a9fe23d5bd0f 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -72,7 +72,7 @@ static struct dasd_discipline dasd_eckd_discipline;
static struct ccw_device_id dasd_eckd_ids[] = {
{ CCW_DEVICE_DEVTYPE (0x3990, 0, 0x3390, 0), .driver_info = 0x1},
{ CCW_DEVICE_DEVTYPE (0x2105, 0, 0x3390, 0), .driver_info = 0x2},
- { CCW_DEVICE_DEVTYPE (0x3880, 0, 0x3390, 0), .driver_info = 0x3},
+ { CCW_DEVICE_DEVTYPE (0x3880, 0, 0x3380, 0), .driver_info = 0x3},
{ CCW_DEVICE_DEVTYPE (0x3990, 0, 0x3380, 0), .driver_info = 0x4},
{ CCW_DEVICE_DEVTYPE (0x2105, 0, 0x3380, 0), .driver_info = 0x5},
{ CCW_DEVICE_DEVTYPE (0x9343, 0, 0x9345, 0), .driver_info = 0x6},
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index c881a14fa5dd..1f6a4d894e73 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -62,8 +62,8 @@ static int xpram_devs;
/*
* Parameter parsing functions.
*/
-static int __initdata devs = XPRAM_DEVS;
-static char __initdata *sizes[XPRAM_MAX_DEVS];
+static int devs = XPRAM_DEVS;
+static char *sizes[XPRAM_MAX_DEVS];
module_param(devs, int, 0);
module_param_array(sizes, charp, NULL, 0);
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index 8cd58e412b5e..5ad44daef73b 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -460,7 +460,8 @@ kbd_ioctl(struct kbd_data *kbd, struct file *file,
unsigned int cmd, unsigned long arg)
{
void __user *argp;
- int ct, perm;
+ unsigned int ct;
+ int perm;
argp = (void __user *)arg;
diff --git a/drivers/s390/char/tape.h b/drivers/s390/char/tape.h
index 7a242f073632..267b54e8ff5a 100644
--- a/drivers/s390/char/tape.h
+++ b/drivers/s390/char/tape.h
@@ -280,6 +280,14 @@ tape_do_io_free(struct tape_device *device, struct tape_request *request)
return rc;
}
+static inline void
+tape_do_io_async_free(struct tape_device *device, struct tape_request *request)
+{
+ request->callback = (void *) tape_free_request;
+ request->callback_data = NULL;
+ tape_do_io_async(device, request);
+}
+
extern int tape_oper_handler(int irq, int status);
extern void tape_noper_handler(int irq, int status);
extern int tape_open(struct tape_device *);
diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c
index c17f35b6136a..c26511171ffe 100644
--- a/drivers/s390/char/tape_34xx.c
+++ b/drivers/s390/char/tape_34xx.c
@@ -53,23 +53,11 @@ static void tape_34xx_delete_sbid_from(struct tape_device *, int);
* Medium sense for 34xx tapes. There is no 'real' medium sense call.
* So we just do a normal sense.
*/
-static int
-tape_34xx_medium_sense(struct tape_device *device)
+static void __tape_34xx_medium_sense(struct tape_request *request)
{
- struct tape_request *request;
- unsigned char *sense;
- int rc;
-
- request = tape_alloc_request(1, 32);
- if (IS_ERR(request)) {
- DBF_EXCEPTION(6, "MSEN fail\n");
- return PTR_ERR(request);
- }
-
- request->op = TO_MSEN;
- tape_ccw_end(request->cpaddr, SENSE, 32, request->cpdata);
+ struct tape_device *device = request->device;
+ unsigned char *sense;
- rc = tape_do_io_interruptible(device, request);
if (request->rc == 0) {
sense = request->cpdata;
@@ -88,15 +76,47 @@ tape_34xx_medium_sense(struct tape_device *device)
device->tape_generic_status |= GMT_WR_PROT(~0);
else
device->tape_generic_status &= ~GMT_WR_PROT(~0);
- } else {
+ } else
DBF_EVENT(4, "tape_34xx: medium sense failed with rc=%d\n",
request->rc);
- }
tape_free_request(request);
+}
+
+static int tape_34xx_medium_sense(struct tape_device *device)
+{
+ struct tape_request *request;
+ int rc;
+
+ request = tape_alloc_request(1, 32);
+ if (IS_ERR(request)) {
+ DBF_EXCEPTION(6, "MSEN fail\n");
+ return PTR_ERR(request);
+ }
+ request->op = TO_MSEN;
+ tape_ccw_end(request->cpaddr, SENSE, 32, request->cpdata);
+ rc = tape_do_io_interruptible(device, request);
+ __tape_34xx_medium_sense(request);
return rc;
}
+static void tape_34xx_medium_sense_async(struct tape_device *device)
+{
+ struct tape_request *request;
+
+ request = tape_alloc_request(1, 32);
+ if (IS_ERR(request)) {
+ DBF_EXCEPTION(6, "MSEN fail\n");
+ return;
+ }
+
+ request->op = TO_MSEN;
+ tape_ccw_end(request->cpaddr, SENSE, 32, request->cpdata);
+ request->callback = (void *) __tape_34xx_medium_sense;
+ request->callback_data = NULL;
+ tape_do_io_async(device, request);
+}
+
struct tape_34xx_work {
struct tape_device *device;
enum tape_op op;
@@ -109,6 +129,9 @@ struct tape_34xx_work {
* is inserted but cannot call tape_do_io* from an interrupt context.
* Maybe that's useful for other actions we want to start from the
* interrupt handler.
+ * Note: the work handler is called by the system work queue. The tape
+ * commands started by the handler need to be asynchrounous, otherwise
+ * a deadlock can occur e.g. in case of a deferred cc=1 (see __tape_do_irq).
*/
static void
tape_34xx_work_handler(struct work_struct *work)
@@ -119,7 +142,7 @@ tape_34xx_work_handler(struct work_struct *work)
switch(p->op) {
case TO_MSEN:
- tape_34xx_medium_sense(device);
+ tape_34xx_medium_sense_async(device);
break;
default:
DBF_EVENT(3, "T34XX: internal error: unknown work\n");
diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index fbe361fcd2c0..de2e99e0a71b 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c
@@ -329,17 +329,17 @@ out:
/*
* Enable encryption
*/
-static int tape_3592_enable_crypt(struct tape_device *device)
+static struct tape_request *__tape_3592_enable_crypt(struct tape_device *device)
{
struct tape_request *request;
char *data;
DBF_EVENT(6, "tape_3592_enable_crypt\n");
if (!crypt_supported(device))
- return -ENOSYS;
+ return ERR_PTR(-ENOSYS);
request = tape_alloc_request(2, 72);
if (IS_ERR(request))
- return PTR_ERR(request);
+ return request;
data = request->cpdata;
memset(data,0,72);
@@ -354,23 +354,42 @@ static int tape_3592_enable_crypt(struct tape_device *device)
request->op = TO_CRYPT_ON;
tape_ccw_cc(request->cpaddr, MODE_SET_CB, 36, data);
tape_ccw_end(request->cpaddr + 1, MODE_SET_CB, 36, data + 36);
+ return request;
+}
+
+static int tape_3592_enable_crypt(struct tape_device *device)
+{
+ struct tape_request *request;
+
+ request = __tape_3592_enable_crypt(device);
+ if (IS_ERR(request))
+ return PTR_ERR(request);
return tape_do_io_free(device, request);
}
+static void tape_3592_enable_crypt_async(struct tape_device *device)
+{
+ struct tape_request *request;
+
+ request = __tape_3592_enable_crypt(device);
+ if (!IS_ERR(request))
+ tape_do_io_async_free(device, request);
+}
+
/*
* Disable encryption
*/
-static int tape_3592_disable_crypt(struct tape_device *device)
+static struct tape_request *__tape_3592_disable_crypt(struct tape_device *device)
{
struct tape_request *request;
char *data;
DBF_EVENT(6, "tape_3592_disable_crypt\n");
if (!crypt_supported(device))
- return -ENOSYS;
+ return ERR_PTR(-ENOSYS);
request = tape_alloc_request(2, 72);
if (IS_ERR(request))
- return PTR_ERR(request);
+ return request;
data = request->cpdata;
memset(data,0,72);
@@ -383,9 +402,28 @@ static int tape_3592_disable_crypt(struct tape_device *device)
tape_ccw_cc(request->cpaddr, MODE_SET_CB, 36, data);
tape_ccw_end(request->cpaddr + 1, MODE_SET_CB, 36, data + 36);
+ return request;
+}
+
+static int tape_3592_disable_crypt(struct tape_device *device)
+{
+ struct tape_request *request;
+
+ request = __tape_3592_disable_crypt(device);
+ if (IS_ERR(request))
+ return PTR_ERR(request);
return tape_do_io_free(device, request);
}
+static void tape_3592_disable_crypt_async(struct tape_device *device)
+{
+ struct tape_request *request;
+
+ request = __tape_3592_disable_crypt(device);
+ if (!IS_ERR(request))
+ tape_do_io_async_free(device, request);
+}
+
/*
* IOCTL: Set encryption status
*/
@@ -457,8 +495,7 @@ tape_3590_ioctl(struct tape_device *device, unsigned int cmd, unsigned long arg)
/*
* SENSE Medium: Get Sense data about medium state
*/
-static int
-tape_3590_sense_medium(struct tape_device *device)
+static int tape_3590_sense_medium(struct tape_device *device)
{
struct tape_request *request;
@@ -470,6 +507,18 @@ tape_3590_sense_medium(struct tape_device *device)
return tape_do_io_free(device, request);
}
+static void tape_3590_sense_medium_async(struct tape_device *device)
+{
+ struct tape_request *request;
+
+ request = tape_alloc_request(1, 128);
+ if (IS_ERR(request))
+ return;
+ request->op = TO_MSEN;
+ tape_ccw_end(request->cpaddr, MEDIUM_SENSE, 128, request->cpdata);
+ tape_do_io_async_free(device, request);
+}
+
/*
* MTTELL: Tell block. Return the number of block relative to current file.
*/
@@ -546,15 +595,14 @@ tape_3590_read_opposite(struct tape_device *device,
* 2. The attention msg is written to the "read subsystem data" buffer.
* In this case we probably should print it to the console.
*/
-static int
-tape_3590_read_attmsg(struct tape_device *device)
+static void tape_3590_read_attmsg_async(struct tape_device *device)
{
struct tape_request *request;
char *buf;
request = tape_alloc_request(3, 4096);
if (IS_ERR(request))
- return PTR_ERR(request);
+ return;
request->op = TO_READ_ATTMSG;
buf = request->cpdata;
buf[0] = PREP_RD_SS_DATA;
@@ -562,12 +610,15 @@ tape_3590_read_attmsg(struct tape_device *device)
tape_ccw_cc(request->cpaddr, PERFORM_SS_FUNC, 12, buf);
tape_ccw_cc(request->cpaddr + 1, READ_SS_DATA, 4096 - 12, buf + 12);
tape_ccw_end(request->cpaddr + 2, NOP, 0, NULL);
- return tape_do_io_free(device, request);
+ tape_do_io_async_free(device, request);
}
/*
* These functions are used to schedule follow-up actions from within an
* interrupt context (like unsolicited interrupts).
+ * Note: the work handler is called by the system work queue. The tape
+ * commands started by the handler need to be asynchrounous, otherwise
+ * a deadlock can occur e.g. in case of a deferred cc=1 (see __tape_do_irq).
*/
struct work_handler_data {
struct tape_device *device;
@@ -583,16 +634,16 @@ tape_3590_work_handler(struct work_struct *work)
switch (p->op) {
case TO_MSEN:
- tape_3590_sense_medium(p->device);
+ tape_3590_sense_medium_async(p->device);
break;
case TO_READ_ATTMSG:
- tape_3590_read_attmsg(p->device);
+ tape_3590_read_attmsg_async(p->device);
break;
case TO_CRYPT_ON:
- tape_3592_enable_crypt(p->device);
+ tape_3592_enable_crypt_async(p->device);
break;
case TO_CRYPT_OFF:
- tape_3592_disable_crypt(p->device);
+ tape_3592_disable_crypt_async(p->device);
break;
default:
DBF_EVENT(3, "T3590: work handler undefined for "
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 2e9a87e8e7d8..ef6de669424b 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -165,7 +165,7 @@ scsi_mod-$(CONFIG_SCSI_NETLINK) += scsi_netlink.o
scsi_mod-$(CONFIG_SYSCTL) += scsi_sysctl.o
scsi_mod-$(CONFIG_SCSI_PROC_FS) += scsi_proc.o
scsi_mod-y += scsi_trace.o
-scsi_mod-$(CONFIG_PM_OPS) += scsi_pm.o
+scsi_mod-$(CONFIG_PM) += scsi_pm.o
scsi_tgt-y += scsi_tgt_lib.o scsi_tgt_if.o
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 79cefbe31367..638c72b7f94a 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -4277,7 +4277,7 @@ static int __devinit beiscsi_dev_probe(struct pci_dev *pcidev,
snprintf(phba->wq_name, sizeof(phba->wq_name), "beiscsi_q_irq%u",
phba->shost->host_no);
- phba->wq = create_workqueue(phba->wq_name);
+ phba->wq = alloc_workqueue(phba->wq_name, WQ_MEM_RECLAIM, 1);
if (!phba->wq) {
shost_printk(KERN_ERR, phba->shost, "beiscsi_dev_probe-"
"Failed to allocate work queue\n");
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 9c5c8be72231..d841e98a8bd5 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6219,11 +6219,10 @@ static struct ata_port_operations ipr_sata_ops = {
};
static struct ata_port_info sata_port_info = {
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_SATA_RESET |
- ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA,
- .pio_mask = 0x10, /* pio4 */
- .mwdma_mask = 0x07,
- .udma_mask = 0x7f, /* udma0-6 */
+ .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
+ .pio_mask = ATA_PIO4_ONLY,
+ .mwdma_mask = ATA_MWDMA2,
+ .udma_mask = ATA_UDMA6,
.port_ops = &ipr_sata_ops
};
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index e1a395b438ee..4d3b704ede1c 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -238,37 +238,43 @@ static bool sas_ata_qc_fill_rtf(struct ata_queued_cmd *qc)
return true;
}
-static void sas_ata_phy_reset(struct ata_port *ap)
+static int sas_ata_hard_reset(struct ata_link *link, unsigned int *class,
+ unsigned long deadline)
{
+ struct ata_port *ap = link->ap;
struct domain_device *dev = ap->private_data;
struct sas_internal *i =
to_sas_internal(dev->port->ha->core.shost->transportt);
int res = TMF_RESP_FUNC_FAILED;
+ int ret = 0;
if (i->dft->lldd_I_T_nexus_reset)
res = i->dft->lldd_I_T_nexus_reset(dev);
- if (res != TMF_RESP_FUNC_COMPLETE)
+ if (res != TMF_RESP_FUNC_COMPLETE) {
SAS_DPRINTK("%s: Unable to reset I T nexus?\n", __func__);
+ ret = -EAGAIN;
+ }
switch (dev->sata_dev.command_set) {
case ATA_COMMAND_SET:
SAS_DPRINTK("%s: Found ATA device.\n", __func__);
- ap->link.device[0].class = ATA_DEV_ATA;
+ *class = ATA_DEV_ATA;
break;
case ATAPI_COMMAND_SET:
SAS_DPRINTK("%s: Found ATAPI device.\n", __func__);
- ap->link.device[0].class = ATA_DEV_ATAPI;
+ *class = ATA_DEV_ATAPI;
break;
default:
SAS_DPRINTK("%s: Unknown SATA command set: %d.\n",
__func__,
dev->sata_dev.command_set);
- ap->link.device[0].class = ATA_DEV_UNKNOWN;
+ *class = ATA_DEV_UNKNOWN;
break;
}
ap->cbl = ATA_CBL_SATA;
+ return ret;
}
static void sas_ata_post_internal(struct ata_queued_cmd *qc)
@@ -349,7 +355,11 @@ static int sas_ata_scr_read(struct ata_link *link, unsigned int sc_reg_in,
}
static struct ata_port_operations sas_sata_ops = {
- .phy_reset = sas_ata_phy_reset,
+ .prereset = ata_std_prereset,
+ .softreset = NULL,
+ .hardreset = sas_ata_hard_reset,
+ .postreset = ata_std_postreset,
+ .error_handler = ata_std_error_handler,
.post_internal_cmd = sas_ata_post_internal,
.qc_defer = ata_std_qc_defer,
.qc_prep = ata_noop_qc_prep,
@@ -362,10 +372,9 @@ static struct ata_port_operations sas_sata_ops = {
};
static struct ata_port_info sata_port_info = {
- .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_SATA_RESET |
- ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ,
- .pio_mask = 0x1f, /* PIO0-4 */
- .mwdma_mask = 0x07, /* MWDMA0-2 */
+ .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ,
+ .pio_mask = ATA_PIO4,
+ .mwdma_mask = ATA_MWDMA2,
.udma_mask = ATA_UDMA6,
.port_ops = &sas_sata_ops
};
@@ -781,3 +790,68 @@ int sas_discover_sata(struct domain_device *dev)
return res;
}
+
+void sas_ata_strategy_handler(struct Scsi_Host *shost)
+{
+ struct scsi_device *sdev;
+
+ shost_for_each_device(sdev, shost) {
+ struct domain_device *ddev = sdev_to_domain_dev(sdev);
+ struct ata_port *ap = ddev->sata_dev.ap;
+
+ if (!dev_is_sata(ddev))
+ continue;
+
+ ata_port_printk(ap, KERN_DEBUG, "sas eh calling libata port error handler");
+ ata_scsi_port_error_handler(shost, ap);
+ }
+}
+
+int sas_ata_timed_out(struct scsi_cmnd *cmd, struct sas_task *task,
+ enum blk_eh_timer_return *rtn)
+{
+ struct domain_device *ddev = cmd_to_domain_dev(cmd);
+
+ if (!dev_is_sata(ddev) || task)
+ return 0;
+
+ /* we're a sata device with no task, so this must be a libata
+ * eh timeout. Ideally should hook into libata timeout
+ * handling, but there's no point, it just wants to activate
+ * the eh thread */
+ *rtn = BLK_EH_NOT_HANDLED;
+ return 1;
+}
+
+int sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q,
+ struct list_head *done_q)
+{
+ int rtn = 0;
+ struct scsi_cmnd *cmd, *n;
+ struct ata_port *ap;
+
+ do {
+ LIST_HEAD(sata_q);
+
+ ap = NULL;
+
+ list_for_each_entry_safe(cmd, n, work_q, eh_entry) {
+ struct domain_device *ddev = cmd_to_domain_dev(cmd);
+
+ if (!dev_is_sata(ddev) || TO_SAS_TASK(cmd))
+ continue;
+ if(ap && ap != ddev->sata_dev.ap)
+ continue;
+ ap = ddev->sata_dev.ap;
+ rtn = 1;
+ list_move(&cmd->eh_entry, &sata_q);
+ }
+
+ if (!list_empty(&sata_q)) {
+ ata_port_printk(ap, KERN_DEBUG,"sas eh calling libata cmd error handler\n");
+ ata_scsi_cmd_error_handler(shost, ap, &sata_q);
+ }
+ } while (ap);
+
+ return rtn;
+}
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 9a7aaf5f1311..67758ea8eb7f 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -663,11 +663,16 @@ void sas_scsi_recover_host(struct Scsi_Host *shost)
* scsi_unjam_host does, but we skip scsi_eh_abort_cmds because any
* command we see here has no sas_task and is thus unknown to the HA.
*/
- if (!scsi_eh_get_sense(&eh_work_q, &ha->eh_done_q))
- scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q);
+ if (!sas_ata_eh(shost, &eh_work_q, &ha->eh_done_q))
+ if (!scsi_eh_get_sense(&eh_work_q, &ha->eh_done_q))
+ scsi_eh_ready_devs(shost, &eh_work_q, &ha->eh_done_q);
out:
+ /* now link into libata eh --- if we have any ata devices */
+ sas_ata_strategy_handler(shost);
+
scsi_eh_flush_done_q(&ha->eh_done_q);
+
SAS_DPRINTK("--- Exit %s\n", __func__);
return;
}
@@ -676,6 +681,11 @@ enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
{
struct sas_task *task = TO_SAS_TASK(cmd);
unsigned long flags;
+ enum blk_eh_timer_return rtn;
+
+ if (sas_ata_timed_out(cmd, task, &rtn))
+ return rtn;
+
if (!task) {
cmd->request->timeout /= 2;
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 44578b56ad0a..d3e58d763b43 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1561,6 +1561,7 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
{
struct Scsi_Host *host = rport_to_shost(rport);
fc_port_t *fcport = *(fc_port_t **)rport->dd_data;
+ unsigned long flags;
if (!fcport)
return;
@@ -1573,10 +1574,10 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
* Transport has effectively 'deleted' the rport, clear
* all local references.
*/
- spin_lock_irq(host->host_lock);
+ spin_lock_irqsave(host->host_lock, flags);
fcport->rport = fcport->drport = NULL;
*((fc_port_t **)rport->dd_data) = NULL;
- spin_unlock_irq(host->host_lock);
+ spin_unlock_irqrestore(host->host_lock, flags);
if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
return;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index f948e1a73aec..d9479c3fe5f8 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -2505,11 +2505,12 @@ qla2x00_rport_del(void *data)
{
fc_port_t *fcport = data;
struct fc_rport *rport;
+ unsigned long flags;
- spin_lock_irq(fcport->vha->host->host_lock);
+ spin_lock_irqsave(fcport->vha->host->host_lock, flags);
rport = fcport->drport ? fcport->drport: fcport->rport;
fcport->drport = NULL;
- spin_unlock_irq(fcport->vha->host->host_lock);
+ spin_unlock_irqrestore(fcport->vha->host->host_lock, flags);
if (rport)
fc_remote_port_delete(rport);
}
@@ -2879,6 +2880,7 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
struct fc_rport_identifiers rport_ids;
struct fc_rport *rport;
struct qla_hw_data *ha = vha->hw;
+ unsigned long flags;
qla2x00_rport_del(fcport);
@@ -2893,9 +2895,9 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
"Unable to allocate fc remote port!\n");
return;
}
- spin_lock_irq(fcport->vha->host->host_lock);
+ spin_lock_irqsave(fcport->vha->host->host_lock, flags);
*((fc_port_t **)rport->dd_data) = fcport;
- spin_unlock_irq(fcport->vha->host->host_lock);
+ spin_unlock_irqrestore(fcport->vha->host->host_lock, flags);
rport->supported_classes = fcport->supported_classes;
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index c194c23ca1fb..e90f7c16b956 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -349,7 +349,7 @@ static int qla25xx_setup_mode(struct scsi_qla_host *vha)
"Can't create request queue\n");
goto fail;
}
- ha->wq = create_workqueue("qla2xxx_wq");
+ ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 1);
vha->req = ha->req_q_map[req];
options |= BIT_1;
for (ques = 1; ques < ha->max_rsp_queues; ques++) {
@@ -562,7 +562,6 @@ qla2xxx_queuecommand_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)
}
if (atomic_read(&fcport->state) != FCS_ONLINE) {
if (atomic_read(&fcport->state) == FCS_DEVICE_DEAD ||
- atomic_read(&fcport->state) == FCS_DEVICE_LOST ||
atomic_read(&base_vha->loop_state) == LOOP_DEAD) {
cmd->result = DID_NO_CONNECT << 16;
goto qc24_fail_command;
@@ -2513,6 +2512,7 @@ qla2x00_schedule_rport_del(struct scsi_qla_host *vha, fc_port_t *fcport,
{
struct fc_rport *rport;
scsi_qla_host_t *base_vha;
+ unsigned long flags;
if (!fcport->rport)
return;
@@ -2520,9 +2520,9 @@ qla2x00_schedule_rport_del(struct scsi_qla_host *vha, fc_port_t *fcport,
rport = fcport->rport;
if (defer) {
base_vha = pci_get_drvdata(vha->hw->pdev);
- spin_lock_irq(vha->host->host_lock);
+ spin_lock_irqsave(vha->host->host_lock, flags);
fcport->drport = rport;
- spin_unlock_irq(vha->host->host_lock);
+ spin_unlock_irqrestore(vha->host->host_lock, flags);
set_bit(FCPORT_UPDATE_NEEDED, &base_vha->dpc_flags);
qla2xxx_wake_dpc(base_vha);
} else
@@ -3282,10 +3282,10 @@ qla2x00_do_dpc(void *data)
set_user_nice(current, -20);
+ set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
DEBUG3(printk("qla2x00: DPC handler sleeping\n"));
- set_current_state(TASK_INTERRUPTIBLE);
schedule();
__set_current_state(TASK_RUNNING);
@@ -3454,7 +3454,9 @@ qla2x00_do_dpc(void *data)
qla2x00_do_dpc_all_vps(base_vha);
ha->dpc_active = 0;
+ set_current_state(TASK_INTERRUPTIBLE);
} /* End of while(1) */
+ __set_current_state(TASK_RUNNING);
DEBUG(printk("scsi(%ld): DPC handler exiting\n", base_vha->host_no));
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 7b310934efed..a6b2d72022fc 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1671,7 +1671,7 @@ static int do_device_access(struct scsi_cmnd *scmd,
unsigned long long lba, unsigned int num, int write)
{
int ret;
- unsigned int block, rest = 0;
+ unsigned long long block, rest = 0;
int (*func)(struct scsi_cmnd *, unsigned char *, int);
func = write ? fetch_to_dev_buffer : fill_from_dev_buffer;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9045c52abd25..fb2bb35c62cb 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -443,7 +443,7 @@ static void scsi_run_queue(struct request_queue *q)
&sdev->request_queue->queue_flags);
if (flagset)
queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
- __blk_run_queue(sdev->request_queue);
+ __blk_run_queue(sdev->request_queue, false);
if (flagset)
queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
spin_unlock(sdev->request_queue->queue_lock);
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index b4056d14f812..342ee1a9c41d 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -146,7 +146,7 @@ static inline void scsi_netlink_exit(void) {}
#endif
/* scsi_pm.c */
-#ifdef CONFIG_PM_OPS
+#ifdef CONFIG_PM
extern const struct dev_pm_ops scsi_bus_pm_ops;
#endif
#ifdef CONFIG_PM_RUNTIME
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 490ce213204e..e44ff64233fd 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -383,7 +383,7 @@ struct bus_type scsi_bus_type = {
.name = "scsi",
.match = scsi_bus_match,
.uevent = scsi_bus_uevent,
-#ifdef CONFIG_PM_OPS
+#ifdef CONFIG_PM
.pm = &scsi_bus_pm_ops,
#endif
};
diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c
index c399be979921..f67282058ba1 100644
--- a/drivers/scsi/scsi_tgt_lib.c
+++ b/drivers/scsi/scsi_tgt_lib.c
@@ -629,7 +629,7 @@ static int __init scsi_tgt_init(void)
if (!scsi_tgt_cmd_cache)
return -ENOMEM;
- scsi_tgtd = create_workqueue("scsi_tgtd");
+ scsi_tgtd = alloc_workqueue("scsi_tgtd", 0, 1);
if (!scsi_tgtd) {
err = -ENOMEM;
goto free_kmemcache;
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 998c01be3234..5c3ccfc6b622 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3829,7 +3829,7 @@ fc_bsg_goose_queue(struct fc_rport *rport)
!test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags);
if (flagset)
queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q);
- __blk_run_queue(rport->rqst_q);
+ __blk_run_queue(rport->rqst_q, false);
if (flagset)
queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q);
spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags);
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index 95928833855b..a429b01d0285 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -1557,9 +1557,7 @@ static int __devinit pxa2xx_spi_probe(struct platform_device *pdev)
drv_data->ssp = ssp;
master->dev.parent = &pdev->dev;
-#ifdef CONFIG_OF
master->dev.of_node = pdev->dev.of_node;
-#endif
/* the spi->mode bits understood by this driver: */
master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
diff --git a/drivers/spi/pxa2xx_spi_pci.c b/drivers/spi/pxa2xx_spi_pci.c
index 351d8a375b57..378e504f89eb 100644
--- a/drivers/spi/pxa2xx_spi_pci.c
+++ b/drivers/spi/pxa2xx_spi_pci.c
@@ -7,10 +7,9 @@
#include <linux/of_device.h>
#include <linux/spi/pxa2xx_spi.h>
-struct awesome_struct {
+struct ce4100_info {
struct ssp_device ssp;
- struct platform_device spi_pdev;
- struct pxa2xx_spi_master spi_pdata;
+ struct platform_device *spi_pdev;
};
static DEFINE_MUTEX(ssp_lock);
@@ -51,23 +50,15 @@ void pxa_ssp_free(struct ssp_device *ssp)
}
EXPORT_SYMBOL_GPL(pxa_ssp_free);
-static void plat_dev_release(struct device *dev)
-{
- struct awesome_struct *as = container_of(dev,
- struct awesome_struct, spi_pdev.dev);
-
- of_device_node_put(&as->spi_pdev.dev);
-}
-
static int __devinit ce4100_spi_probe(struct pci_dev *dev,
const struct pci_device_id *ent)
{
int ret;
resource_size_t phys_beg;
resource_size_t phys_len;
- struct awesome_struct *spi_info;
+ struct ce4100_info *spi_info;
struct platform_device *pdev;
- struct pxa2xx_spi_master *spi_pdata;
+ struct pxa2xx_spi_master spi_pdata;
struct ssp_device *ssp;
ret = pci_enable_device(dev);
@@ -84,33 +75,28 @@ static int __devinit ce4100_spi_probe(struct pci_dev *dev,
return ret;
}
+ pdev = platform_device_alloc("pxa2xx-spi", dev->devfn);
spi_info = kzalloc(sizeof(*spi_info), GFP_KERNEL);
- if (!spi_info) {
+ if (!pdev || !spi_info ) {
ret = -ENOMEM;
- goto err_kz;
+ goto err_nomem;
}
- ssp = &spi_info->ssp;
- pdev = &spi_info->spi_pdev;
- spi_pdata = &spi_info->spi_pdata;
+ memset(&spi_pdata, 0, sizeof(spi_pdata));
+ spi_pdata.num_chipselect = dev->devfn;
- pdev->name = "pxa2xx-spi";
- pdev->id = dev->devfn;
- pdev->dev.parent = &dev->dev;
- pdev->dev.platform_data = &spi_info->spi_pdata;
+ ret = platform_device_add_data(pdev, &spi_pdata, sizeof(spi_pdata));
+ if (ret)
+ goto err_nomem;
-#ifdef CONFIG_OF
+ pdev->dev.parent = &dev->dev;
pdev->dev.of_node = dev->dev.of_node;
-#endif
- pdev->dev.release = plat_dev_release;
-
- spi_pdata->num_chipselect = dev->devfn;
-
+ ssp = &spi_info->ssp;
ssp->phys_base = pci_resource_start(dev, 0);
ssp->mmio_base = ioremap(phys_beg, phys_len);
if (!ssp->mmio_base) {
dev_err(&pdev->dev, "failed to ioremap() registers\n");
ret = -EIO;
- goto err_remap;
+ goto err_nomem;
}
ssp->irq = dev->irq;
ssp->port_id = pdev->id;
@@ -122,7 +108,7 @@ static int __devinit ce4100_spi_probe(struct pci_dev *dev,
pci_set_drvdata(dev, spi_info);
- ret = platform_device_register(pdev);
+ ret = platform_device_add(pdev);
if (ret)
goto err_dev_add;
@@ -135,27 +121,21 @@ err_dev_add:
mutex_unlock(&ssp_lock);
iounmap(ssp->mmio_base);
-err_remap:
- kfree(spi_info);
-
-err_kz:
+err_nomem:
release_mem_region(phys_beg, phys_len);
-
+ platform_device_put(pdev);
+ kfree(spi_info);
return ret;
}
static void __devexit ce4100_spi_remove(struct pci_dev *dev)
{
- struct awesome_struct *spi_info;
- struct platform_device *pdev;
+ struct ce4100_info *spi_info;
struct ssp_device *ssp;
spi_info = pci_get_drvdata(dev);
-
ssp = &spi_info->ssp;
- pdev = &spi_info->spi_pdev;
-
- platform_device_unregister(pdev);
+ platform_device_unregister(spi_info->spi_pdev);
iounmap(ssp->mmio_base);
release_mem_region(pci_resource_start(dev, 0),
@@ -171,7 +151,6 @@ static void __devexit ce4100_spi_remove(struct pci_dev *dev)
}
static struct pci_device_id ce4100_spi_devices[] __devinitdata = {
-
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2e6a) },
{ },
};
diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c
index 7adaef62a991..4d2c75df886c 100644
--- a/drivers/spi/xilinx_spi.c
+++ b/drivers/spi/xilinx_spi.c
@@ -351,14 +351,12 @@ static irqreturn_t xilinx_spi_irq(int irq, void *dev_id)
return IRQ_HANDLED;
}
-#ifdef CONFIG_OF
static const struct of_device_id xilinx_spi_of_match[] = {
{ .compatible = "xlnx,xps-spi-2.00.a", },
{ .compatible = "xlnx,xps-spi-2.00.b", },
{}
};
MODULE_DEVICE_TABLE(of, xilinx_spi_of_match);
-#endif
struct spi_master *xilinx_spi_init(struct device *dev, struct resource *mem,
u32 irq, s16 bus_num, int num_cs, int little_endian, int bits_per_word)
@@ -394,9 +392,7 @@ struct spi_master *xilinx_spi_init(struct device *dev, struct resource *mem,
master->bus_num = bus_num;
master->num_chipselect = num_cs;
-#ifdef CONFIG_OF
master->dev.of_node = dev->of_node;
-#endif
xspi->mem = *mem;
xspi->irq = irq;
@@ -539,9 +535,7 @@ static struct platform_driver xilinx_spi_driver = {
.driver = {
.name = XILINX_SPI_NAME,
.owner = THIS_MODULE,
-#ifdef CONFIG_OF
.of_match_table = xilinx_spi_of_match,
-#endif
},
};
diff --git a/drivers/target/Makefile b/drivers/target/Makefile
index 5cfd70819f08..973bb190ef57 100644
--- a/drivers/target/Makefile
+++ b/drivers/target/Makefile
@@ -13,8 +13,7 @@ target_core_mod-y := target_core_configfs.o \
target_core_transport.o \
target_core_cdb.o \
target_core_ua.o \
- target_core_rd.o \
- target_core_mib.o
+ target_core_rd.o
obj-$(CONFIG_TARGET_CORE) += target_core_mod.o
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 2764510798b0..caf8dc18ee0a 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -37,7 +37,6 @@
#include <linux/parser.h>
#include <linux/syscalls.h>
#include <linux/configfs.h>
-#include <linux/proc_fs.h>
#include <target/target_core_base.h>
#include <target/target_core_device.h>
@@ -1971,13 +1970,35 @@ static void target_core_dev_release(struct config_item *item)
{
struct se_subsystem_dev *se_dev = container_of(to_config_group(item),
struct se_subsystem_dev, se_dev_group);
- struct config_group *dev_cg;
-
- if (!(se_dev))
- return;
+ struct se_hba *hba = item_to_hba(&se_dev->se_dev_hba->hba_group.cg_item);
+ struct se_subsystem_api *t = hba->transport;
+ struct config_group *dev_cg = &se_dev->se_dev_group;
- dev_cg = &se_dev->se_dev_group;
kfree(dev_cg->default_groups);
+ /*
+ * This pointer will set when the storage is enabled with:
+ *`echo 1 > $CONFIGFS/core/$HBA/$DEV/dev_enable`
+ */
+ if (se_dev->se_dev_ptr) {
+ printk(KERN_INFO "Target_Core_ConfigFS: Calling se_free_"
+ "virtual_device() for se_dev_ptr: %p\n",
+ se_dev->se_dev_ptr);
+
+ se_free_virtual_device(se_dev->se_dev_ptr, hba);
+ } else {
+ /*
+ * Release struct se_subsystem_dev->se_dev_su_ptr..
+ */
+ printk(KERN_INFO "Target_Core_ConfigFS: Calling t->free_"
+ "device() for se_dev_su_ptr: %p\n",
+ se_dev->se_dev_su_ptr);
+
+ t->free_device(se_dev->se_dev_su_ptr);
+ }
+
+ printk(KERN_INFO "Target_Core_ConfigFS: Deallocating se_subsystem"
+ "_dev_t: %p\n", se_dev);
+ kfree(se_dev);
}
static ssize_t target_core_dev_show(struct config_item *item,
@@ -2140,7 +2161,16 @@ static struct configfs_attribute *target_core_alua_lu_gp_attrs[] = {
NULL,
};
+static void target_core_alua_lu_gp_release(struct config_item *item)
+{
+ struct t10_alua_lu_gp *lu_gp = container_of(to_config_group(item),
+ struct t10_alua_lu_gp, lu_gp_group);
+
+ core_alua_free_lu_gp(lu_gp);
+}
+
static struct configfs_item_operations target_core_alua_lu_gp_ops = {
+ .release = target_core_alua_lu_gp_release,
.show_attribute = target_core_alua_lu_gp_attr_show,
.store_attribute = target_core_alua_lu_gp_attr_store,
};
@@ -2191,9 +2221,11 @@ static void target_core_alua_drop_lu_gp(
printk(KERN_INFO "Target_Core_ConfigFS: Releasing ALUA Logical Unit"
" Group: core/alua/lu_gps/%s, ID: %hu\n",
config_item_name(item), lu_gp->lu_gp_id);
-
+ /*
+ * core_alua_free_lu_gp() is called from target_core_alua_lu_gp_ops->release()
+ * -> target_core_alua_lu_gp_release()
+ */
config_item_put(item);
- core_alua_free_lu_gp(lu_gp);
}
static struct configfs_group_operations target_core_alua_lu_gps_group_ops = {
@@ -2549,7 +2581,16 @@ static struct configfs_attribute *target_core_alua_tg_pt_gp_attrs[] = {
NULL,
};
+static void target_core_alua_tg_pt_gp_release(struct config_item *item)
+{
+ struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(to_config_group(item),
+ struct t10_alua_tg_pt_gp, tg_pt_gp_group);
+
+ core_alua_free_tg_pt_gp(tg_pt_gp);
+}
+
static struct configfs_item_operations target_core_alua_tg_pt_gp_ops = {
+ .release = target_core_alua_tg_pt_gp_release,
.show_attribute = target_core_alua_tg_pt_gp_attr_show,
.store_attribute = target_core_alua_tg_pt_gp_attr_store,
};
@@ -2602,9 +2643,11 @@ static void target_core_alua_drop_tg_pt_gp(
printk(KERN_INFO "Target_Core_ConfigFS: Releasing ALUA Target Port"
" Group: alua/tg_pt_gps/%s, ID: %hu\n",
config_item_name(item), tg_pt_gp->tg_pt_gp_id);
-
+ /*
+ * core_alua_free_tg_pt_gp() is called from target_core_alua_tg_pt_gp_ops->release()
+ * -> target_core_alua_tg_pt_gp_release().
+ */
config_item_put(item);
- core_alua_free_tg_pt_gp(tg_pt_gp);
}
static struct configfs_group_operations target_core_alua_tg_pt_gps_group_ops = {
@@ -2771,13 +2814,11 @@ static void target_core_drop_subdev(
struct se_subsystem_api *t;
struct config_item *df_item;
struct config_group *dev_cg, *tg_pt_gp_cg;
- int i, ret;
+ int i;
hba = item_to_hba(&se_dev->se_dev_hba->hba_group.cg_item);
- if (mutex_lock_interruptible(&hba->hba_access_mutex))
- goto out;
-
+ mutex_lock(&hba->hba_access_mutex);
t = hba->transport;
spin_lock(&se_global->g_device_lock);
@@ -2791,7 +2832,10 @@ static void target_core_drop_subdev(
config_item_put(df_item);
}
kfree(tg_pt_gp_cg->default_groups);
- core_alua_free_tg_pt_gp(T10_ALUA(se_dev)->default_tg_pt_gp);
+ /*
+ * core_alua_free_tg_pt_gp() is called from ->default_tg_pt_gp
+ * directly from target_core_alua_tg_pt_gp_release().
+ */
T10_ALUA(se_dev)->default_tg_pt_gp = NULL;
dev_cg = &se_dev->se_dev_group;
@@ -2800,38 +2844,12 @@ static void target_core_drop_subdev(
dev_cg->default_groups[i] = NULL;
config_item_put(df_item);
}
-
- config_item_put(item);
/*
- * This pointer will set when the storage is enabled with:
- * `echo 1 > $CONFIGFS/core/$HBA/$DEV/dev_enable`
+ * The releasing of se_dev and associated se_dev->se_dev_ptr is done
+ * from target_core_dev_item_ops->release() ->target_core_dev_release().
*/
- if (se_dev->se_dev_ptr) {
- printk(KERN_INFO "Target_Core_ConfigFS: Calling se_free_"
- "virtual_device() for se_dev_ptr: %p\n",
- se_dev->se_dev_ptr);
-
- ret = se_free_virtual_device(se_dev->se_dev_ptr, hba);
- if (ret < 0)
- goto hba_out;
- } else {
- /*
- * Release struct se_subsystem_dev->se_dev_su_ptr..
- */
- printk(KERN_INFO "Target_Core_ConfigFS: Calling t->free_"
- "device() for se_dev_su_ptr: %p\n",
- se_dev->se_dev_su_ptr);
-
- t->free_device(se_dev->se_dev_su_ptr);
- }
-
- printk(KERN_INFO "Target_Core_ConfigFS: Deallocating se_subsystem"
- "_dev_t: %p\n", se_dev);
-
-hba_out:
+ config_item_put(item);
mutex_unlock(&hba->hba_access_mutex);
-out:
- kfree(se_dev);
}
static struct configfs_group_operations target_core_hba_group_ops = {
@@ -2914,6 +2932,13 @@ SE_HBA_ATTR(hba_mode, S_IRUGO | S_IWUSR);
CONFIGFS_EATTR_OPS(target_core_hba, se_hba, hba_group);
+static void target_core_hba_release(struct config_item *item)
+{
+ struct se_hba *hba = container_of(to_config_group(item),
+ struct se_hba, hba_group);
+ core_delete_hba(hba);
+}
+
static struct configfs_attribute *target_core_hba_attrs[] = {
&target_core_hba_hba_info.attr,
&target_core_hba_hba_mode.attr,
@@ -2921,6 +2946,7 @@ static struct configfs_attribute *target_core_hba_attrs[] = {
};
static struct configfs_item_operations target_core_hba_item_ops = {
+ .release = target_core_hba_release,
.show_attribute = target_core_hba_attr_show,
.store_attribute = target_core_hba_attr_store,
};
@@ -2997,10 +3023,11 @@ static void target_core_call_delhbafromtarget(
struct config_group *group,
struct config_item *item)
{
- struct se_hba *hba = item_to_hba(item);
-
+ /*
+ * core_delete_hba() is called from target_core_hba_item_ops->release()
+ * -> target_core_hba_release()
+ */
config_item_put(item);
- core_delete_hba(hba);
}
static struct configfs_group_operations target_core_group_ops = {
@@ -3022,7 +3049,6 @@ static int target_core_init_configfs(void)
struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL;
struct config_group *lu_gp_cg = NULL;
struct configfs_subsystem *subsys;
- struct proc_dir_entry *scsi_target_proc = NULL;
struct t10_alua_lu_gp *lu_gp;
int ret;
@@ -3128,21 +3154,10 @@ static int target_core_init_configfs(void)
if (core_dev_setup_virtual_lun0() < 0)
goto out;
- scsi_target_proc = proc_mkdir("scsi_target", 0);
- if (!(scsi_target_proc)) {
- printk(KERN_ERR "proc_mkdir(scsi_target, 0) failed\n");
- goto out;
- }
- ret = init_scsi_target_mib();
- if (ret < 0)
- goto out;
-
return 0;
out:
configfs_unregister_subsystem(subsys);
- if (scsi_target_proc)
- remove_proc_entry("scsi_target", 0);
core_dev_release_virtual_lun0();
rd_module_exit();
out_global:
@@ -3178,8 +3193,7 @@ static void target_core_exit_configfs(void)
config_item_put(item);
}
kfree(lu_gp_cg->default_groups);
- core_alua_free_lu_gp(se_global->default_lu_gp);
- se_global->default_lu_gp = NULL;
+ lu_gp_cg->default_groups = NULL;
alua_cg = &se_global->alua_group;
for (i = 0; alua_cg->default_groups[i]; i++) {
@@ -3188,6 +3202,7 @@ static void target_core_exit_configfs(void)
config_item_put(item);
}
kfree(alua_cg->default_groups);
+ alua_cg->default_groups = NULL;
hba_cg = &se_global->target_core_hbagroup;
for (i = 0; hba_cg->default_groups[i]; i++) {
@@ -3196,20 +3211,20 @@ static void target_core_exit_configfs(void)
config_item_put(item);
}
kfree(hba_cg->default_groups);
-
- for (i = 0; subsys->su_group.default_groups[i]; i++) {
- item = &subsys->su_group.default_groups[i]->cg_item;
- subsys->su_group.default_groups[i] = NULL;
- config_item_put(item);
- }
+ hba_cg->default_groups = NULL;
+ /*
+ * We expect subsys->su_group.default_groups to be released
+ * by configfs subsystem provider logic..
+ */
+ configfs_unregister_subsystem(subsys);
kfree(subsys->su_group.default_groups);
- configfs_unregister_subsystem(subsys);
+ core_alua_free_lu_gp(se_global->default_lu_gp);
+ se_global->default_lu_gp = NULL;
+
printk(KERN_INFO "TARGET_CORE[0]: Released ConfigFS Fabric"
" Infrastructure\n");
- remove_scsi_target_mib();
- remove_proc_entry("scsi_target", 0);
core_dev_release_virtual_lun0();
rd_module_exit();
release_se_global();
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 317ce58d426d..5da051a07fa3 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -373,11 +373,11 @@ int core_update_device_list_for_node(
/*
* deve->se_lun_acl will be NULL for demo-mode created LUNs
* that have not been explictly concerted to MappedLUNs ->
- * struct se_lun_acl.
+ * struct se_lun_acl, but we remove deve->alua_port_list from
+ * port->sep_alua_list. This also means that active UAs and
+ * NodeACL context specific PR metadata for demo-mode
+ * MappedLUN *deve will be released below..
*/
- if (!(deve->se_lun_acl))
- return 0;
-
spin_lock_bh(&port->sep_alua_lock);
list_del(&deve->alua_port_list);
spin_unlock_bh(&port->sep_alua_lock);
@@ -395,12 +395,14 @@ int core_update_device_list_for_node(
printk(KERN_ERR "struct se_dev_entry->se_lun_acl"
" already set for demo mode -> explict"
" LUN ACL transition\n");
+ spin_unlock_irq(&nacl->device_list_lock);
return -1;
}
if (deve->se_lun != lun) {
printk(KERN_ERR "struct se_dev_entry->se_lun does"
" match passed struct se_lun for demo mode"
" -> explict LUN ACL transition\n");
+ spin_unlock_irq(&nacl->device_list_lock);
return -1;
}
deve->se_lun_acl = lun_acl;
@@ -865,9 +867,6 @@ static void se_dev_stop(struct se_device *dev)
}
}
spin_unlock(&hba->device_lock);
-
- while (atomic_read(&hba->dev_mib_access_count))
- cpu_relax();
}
int se_dev_check_online(struct se_device *dev)
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 32b148d7e261..b65d1c8e7740 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -214,12 +214,22 @@ TCM_MAPPEDLUN_ATTR(write_protect, S_IRUGO | S_IWUSR);
CONFIGFS_EATTR_OPS(target_fabric_mappedlun, se_lun_acl, se_lun_group);
+static void target_fabric_mappedlun_release(struct config_item *item)
+{
+ struct se_lun_acl *lacl = container_of(to_config_group(item),
+ struct se_lun_acl, se_lun_group);
+ struct se_portal_group *se_tpg = lacl->se_lun_nacl->se_tpg;
+
+ core_dev_free_initiator_node_lun_acl(se_tpg, lacl);
+}
+
static struct configfs_attribute *target_fabric_mappedlun_attrs[] = {
&target_fabric_mappedlun_write_protect.attr,
NULL,
};
static struct configfs_item_operations target_fabric_mappedlun_item_ops = {
+ .release = target_fabric_mappedlun_release,
.show_attribute = target_fabric_mappedlun_attr_show,
.store_attribute = target_fabric_mappedlun_attr_store,
.allow_link = target_fabric_mappedlun_link,
@@ -337,15 +347,21 @@ static void target_fabric_drop_mappedlun(
struct config_group *group,
struct config_item *item)
{
- struct se_lun_acl *lacl = container_of(to_config_group(item),
- struct se_lun_acl, se_lun_group);
- struct se_portal_group *se_tpg = lacl->se_lun_nacl->se_tpg;
-
config_item_put(item);
- core_dev_free_initiator_node_lun_acl(se_tpg, lacl);
+}
+
+static void target_fabric_nacl_base_release(struct config_item *item)
+{
+ struct se_node_acl *se_nacl = container_of(to_config_group(item),
+ struct se_node_acl, acl_group);
+ struct se_portal_group *se_tpg = se_nacl->se_tpg;
+ struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
+
+ tf->tf_ops.fabric_drop_nodeacl(se_nacl);
}
static struct configfs_item_operations target_fabric_nacl_base_item_ops = {
+ .release = target_fabric_nacl_base_release,
.show_attribute = target_fabric_nacl_base_attr_show,
.store_attribute = target_fabric_nacl_base_attr_store,
};
@@ -404,9 +420,6 @@ static void target_fabric_drop_nodeacl(
struct config_group *group,
struct config_item *item)
{
- struct se_portal_group *se_tpg = container_of(group,
- struct se_portal_group, tpg_acl_group);
- struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
struct se_node_acl *se_nacl = container_of(to_config_group(item),
struct se_node_acl, acl_group);
struct config_item *df_item;
@@ -419,9 +432,10 @@ static void target_fabric_drop_nodeacl(
nacl_cg->default_groups[i] = NULL;
config_item_put(df_item);
}
-
+ /*
+ * struct se_node_acl free is done in target_fabric_nacl_base_release()
+ */
config_item_put(item);
- tf->tf_ops.fabric_drop_nodeacl(se_nacl);
}
static struct configfs_group_operations target_fabric_nacl_group_ops = {
@@ -437,7 +451,18 @@ TF_CIT_SETUP(tpg_nacl, NULL, &target_fabric_nacl_group_ops, NULL);
CONFIGFS_EATTR_OPS(target_fabric_np_base, se_tpg_np, tpg_np_group);
+static void target_fabric_np_base_release(struct config_item *item)
+{
+ struct se_tpg_np *se_tpg_np = container_of(to_config_group(item),
+ struct se_tpg_np, tpg_np_group);
+ struct se_portal_group *se_tpg = se_tpg_np->tpg_np_parent;
+ struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
+
+ tf->tf_ops.fabric_drop_np(se_tpg_np);
+}
+
static struct configfs_item_operations target_fabric_np_base_item_ops = {
+ .release = target_fabric_np_base_release,
.show_attribute = target_fabric_np_base_attr_show,
.store_attribute = target_fabric_np_base_attr_store,
};
@@ -466,6 +491,7 @@ static struct config_group *target_fabric_make_np(
if (!(se_tpg_np) || IS_ERR(se_tpg_np))
return ERR_PTR(-EINVAL);
+ se_tpg_np->tpg_np_parent = se_tpg;
config_group_init_type_name(&se_tpg_np->tpg_np_group, name,
&TF_CIT_TMPL(tf)->tfc_tpg_np_base_cit);
@@ -476,14 +502,10 @@ static void target_fabric_drop_np(
struct config_group *group,
struct config_item *item)
{
- struct se_portal_group *se_tpg = container_of(group,
- struct se_portal_group, tpg_np_group);
- struct target_fabric_configfs *tf = se_tpg->se_tpg_wwn->wwn_tf;
- struct se_tpg_np *se_tpg_np = container_of(to_config_group(item),
- struct se_tpg_np, tpg_np_group);
-
+ /*
+ * struct se_tpg_np is released via target_fabric_np_base_release()
+ */
config_item_put(item);
- tf->tf_ops.fabric_drop_np(se_tpg_np);
}
static struct configfs_group_operations target_fabric_np_group_ops = {
@@ -814,7 +836,18 @@ TF_CIT_SETUP(tpg_param, &target_fabric_tpg_param_item_ops, NULL, NULL);
*/
CONFIGFS_EATTR_OPS(target_fabric_tpg, se_portal_group, tpg_group);
+static void target_fabric_tpg_release(struct config_item *item)
+{
+ struct se_portal_group *se_tpg = container_of(to_config_group(item),
+ struct se_portal_group, tpg_group);
+ struct se_wwn *wwn = se_tpg->se_tpg_wwn;
+ struct target_fabric_configfs *tf = wwn->wwn_tf;
+
+ tf->tf_ops.fabric_drop_tpg(se_tpg);
+}
+
static struct configfs_item_operations target_fabric_tpg_base_item_ops = {
+ .release = target_fabric_tpg_release,
.show_attribute = target_fabric_tpg_attr_show,
.store_attribute = target_fabric_tpg_attr_store,
};
@@ -872,8 +905,6 @@ static void target_fabric_drop_tpg(
struct config_group *group,
struct config_item *item)
{
- struct se_wwn *wwn = container_of(group, struct se_wwn, wwn_group);
- struct target_fabric_configfs *tf = wwn->wwn_tf;
struct se_portal_group *se_tpg = container_of(to_config_group(item),
struct se_portal_group, tpg_group);
struct config_group *tpg_cg = &se_tpg->tpg_group;
@@ -890,15 +921,28 @@ static void target_fabric_drop_tpg(
}
config_item_put(item);
- tf->tf_ops.fabric_drop_tpg(se_tpg);
}
+static void target_fabric_release_wwn(struct config_item *item)
+{
+ struct se_wwn *wwn = container_of(to_config_group(item),
+ struct se_wwn, wwn_group);
+ struct target_fabric_configfs *tf = wwn->wwn_tf;
+
+ tf->tf_ops.fabric_drop_wwn(wwn);
+}
+
+static struct configfs_item_operations target_fabric_tpg_item_ops = {
+ .release = target_fabric_release_wwn,
+};
+
static struct configfs_group_operations target_fabric_tpg_group_ops = {
.make_group = target_fabric_make_tpg,
.drop_item = target_fabric_drop_tpg,
};
-TF_CIT_SETUP(tpg, NULL, &target_fabric_tpg_group_ops, NULL);
+TF_CIT_SETUP(tpg, &target_fabric_tpg_item_ops, &target_fabric_tpg_group_ops,
+ NULL);
/* End of tfc_tpg_cit */
@@ -932,13 +976,7 @@ static void target_fabric_drop_wwn(
struct config_group *group,
struct config_item *item)
{
- struct target_fabric_configfs *tf = container_of(group,
- struct target_fabric_configfs, tf_group);
- struct se_wwn *wwn = container_of(to_config_group(item),
- struct se_wwn, wwn_group);
-
config_item_put(item);
- tf->tf_ops.fabric_drop_wwn(wwn);
}
static struct configfs_group_operations target_fabric_wwn_group_ops = {
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index c6e0d757e76e..67f0c09983c8 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -154,7 +154,7 @@ static struct se_device *iblock_create_virtdevice(
bd = blkdev_get_by_path(ib_dev->ibd_udev_path,
FMODE_WRITE|FMODE_READ|FMODE_EXCL, ib_dev);
- if (!(bd))
+ if (IS_ERR(bd))
goto failed;
/*
* Setup the local scope queue_limits from struct request_queue->limits
@@ -220,8 +220,10 @@ static void iblock_free_device(void *p)
{
struct iblock_dev *ib_dev = p;
- blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL);
- bioset_free(ib_dev->ibd_bio_set);
+ if (ib_dev->ibd_bd != NULL)
+ blkdev_put(ib_dev->ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL);
+ if (ib_dev->ibd_bio_set != NULL)
+ bioset_free(ib_dev->ibd_bio_set);
kfree(ib_dev);
}
diff --git a/drivers/target/target_core_mib.c b/drivers/target/target_core_mib.c
deleted file mode 100644
index d5a48aa0d2d1..000000000000
--- a/drivers/target/target_core_mib.c
+++ /dev/null
@@ -1,1078 +0,0 @@
-/*******************************************************************************
- * Filename: target_core_mib.c
- *
- * Copyright (c) 2006-2007 SBE, Inc. All Rights Reserved.
- * Copyright (c) 2007-2010 Rising Tide Systems
- * Copyright (c) 2008-2010 Linux-iSCSI.org
- *
- * Nicholas A. Bellinger <nab@linux-iscsi.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- ******************************************************************************/
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/version.h>
-#include <generated/utsrelease.h>
-#include <linux/utsname.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/blkdev.h>
-#include <scsi/scsi.h>
-#include <scsi/scsi_device.h>
-#include <scsi/scsi_host.h>
-
-#include <target/target_core_base.h>
-#include <target/target_core_transport.h>
-#include <target/target_core_fabric_ops.h>
-#include <target/target_core_configfs.h>
-
-#include "target_core_hba.h"
-#include "target_core_mib.h"
-
-/* SCSI mib table index */
-static struct scsi_index_table scsi_index_table;
-
-#ifndef INITIAL_JIFFIES
-#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
-#endif
-
-/* SCSI Instance Table */
-#define SCSI_INST_SW_INDEX 1
-#define SCSI_TRANSPORT_INDEX 1
-
-#define NONE "None"
-#define ISPRINT(a) ((a >= ' ') && (a <= '~'))
-
-static inline int list_is_first(const struct list_head *list,
- const struct list_head *head)
-{
- return list->prev == head;
-}
-
-static void *locate_hba_start(
- struct seq_file *seq,
- loff_t *pos)
-{
- spin_lock(&se_global->g_device_lock);
- return seq_list_start(&se_global->g_se_dev_list, *pos);
-}
-
-static void *locate_hba_next(
- struct seq_file *seq,
- void *v,
- loff_t *pos)
-{
- return seq_list_next(v, &se_global->g_se_dev_list, pos);
-}
-
-static void locate_hba_stop(struct seq_file *seq, void *v)
-{
- spin_unlock(&se_global->g_device_lock);
-}
-
-/****************************************************************************
- * SCSI MIB Tables
- ****************************************************************************/
-
-/*
- * SCSI Instance Table
- */
-static void *scsi_inst_seq_start(
- struct seq_file *seq,
- loff_t *pos)
-{
- spin_lock(&se_global->hba_lock);
- return seq_list_start(&se_global->g_hba_list, *pos);
-}
-
-static void *scsi_inst_seq_next(
- struct seq_file *seq,
- void *v,
- loff_t *pos)
-{
- return seq_list_next(v, &se_global->g_hba_list, pos);
-}
-
-static void scsi_inst_seq_stop(struct seq_file *seq, void *v)
-{
- spin_unlock(&se_global->hba_lock);
-}
-
-static int scsi_inst_seq_show(struct seq_file *seq, void *v)
-{
- struct se_hba *hba = list_entry(v, struct se_hba, hba_list);
-
- if (list_is_first(&hba->hba_list, &se_global->g_hba_list))
- seq_puts(seq, "inst sw_indx\n");
-
- seq_printf(seq, "%u %u\n", hba->hba_index, SCSI_INST_SW_INDEX);
- seq_printf(seq, "plugin: %s version: %s\n",
- hba->transport->name, TARGET_CORE_VERSION);
-
- return 0;
-}
-
-static const struct seq_operations scsi_inst_seq_ops = {
- .start = scsi_inst_seq_start,
- .next = scsi_inst_seq_next,
- .stop = scsi_inst_seq_stop,
- .show = scsi_inst_seq_show
-};
-
-static int scsi_inst_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &scsi_inst_seq_ops);
-}
-
-static const struct file_operations scsi_inst_seq_fops = {
- .owner = THIS_MODULE,
- .open = scsi_inst_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-/*
- * SCSI Device Table
- */
-static void *scsi_dev_seq_start(struct seq_file *seq, loff_t *pos)
-{
- return locate_hba_start(seq, pos);
-}
-
-static void *scsi_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- return locate_hba_next(seq, v, pos);
-}
-
-static void scsi_dev_seq_stop(struct seq_file *seq, void *v)
-{
- locate_hba_stop(seq, v);
-}
-
-static int scsi_dev_seq_show(struct seq_file *seq, void *v)
-{
- struct se_hba *hba;
- struct se_subsystem_dev *se_dev = list_entry(v, struct se_subsystem_dev,
- g_se_dev_list);
- struct se_device *dev = se_dev->se_dev_ptr;
- char str[28];
- int k;
-
- if (list_is_first(&se_dev->g_se_dev_list, &se_global->g_se_dev_list))
- seq_puts(seq, "inst indx role ports\n");
-
- if (!(dev))
- return 0;
-
- hba = dev->se_hba;
- if (!(hba)) {
- /* Log error ? */
- return 0;
- }
-
- seq_printf(seq, "%u %u %s %u\n", hba->hba_index,
- dev->dev_index, "Target", dev->dev_port_count);
-
- memcpy(&str[0], (void *)DEV_T10_WWN(dev), 28);
-
- /* vendor */
- for (k = 0; k < 8; k++)
- str[k] = ISPRINT(DEV_T10_WWN(dev)->vendor[k]) ?
- DEV_T10_WWN(dev)->vendor[k] : 0x20;
- str[k] = 0x20;
-
- /* model */
- for (k = 0; k < 16; k++)
- str[k+9] = ISPRINT(DEV_T10_WWN(dev)->model[k]) ?
- DEV_T10_WWN(dev)->model[k] : 0x20;
- str[k + 9] = 0;
-
- seq_printf(seq, "dev_alias: %s\n", str);
-
- return 0;
-}
-
-static const struct seq_operations scsi_dev_seq_ops = {
- .start = scsi_dev_seq_start,
- .next = scsi_dev_seq_next,
- .stop = scsi_dev_seq_stop,
- .show = scsi_dev_seq_show
-};
-
-static int scsi_dev_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &scsi_dev_seq_ops);
-}
-
-static const struct file_operations scsi_dev_seq_fops = {
- .owner = THIS_MODULE,
- .open = scsi_dev_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-/*
- * SCSI Port Table
- */
-static void *scsi_port_seq_start(struct seq_file *seq, loff_t *pos)
-{
- return locate_hba_start(seq, pos);
-}
-
-static void *scsi_port_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- return locate_hba_next(seq, v, pos);
-}
-
-static void scsi_port_seq_stop(struct seq_file *seq, void *v)
-{
- locate_hba_stop(seq, v);
-}
-
-static int scsi_port_seq_show(struct seq_file *seq, void *v)
-{
- struct se_hba *hba;
- struct se_subsystem_dev *se_dev = list_entry(v, struct se_subsystem_dev,
- g_se_dev_list);
- struct se_device *dev = se_dev->se_dev_ptr;
- struct se_port *sep, *sep_tmp;
-
- if (list_is_first(&se_dev->g_se_dev_list, &se_global->g_se_dev_list))
- seq_puts(seq, "inst device indx role busy_count\n");
-
- if (!(dev))
- return 0;
-
- hba = dev->se_hba;
- if (!(hba)) {
- /* Log error ? */
- return 0;
- }
-
- /* FIXME: scsiPortBusyStatuses count */
- spin_lock(&dev->se_port_lock);
- list_for_each_entry_safe(sep, sep_tmp, &dev->dev_sep_list, sep_list) {
- seq_printf(seq, "%u %u %u %s%u %u\n", hba->hba_index,
- dev->dev_index, sep->sep_index, "Device",
- dev->dev_index, 0);
- }
- spin_unlock(&dev->se_port_lock);
-
- return 0;
-}
-
-static const struct seq_operations scsi_port_seq_ops = {
- .start = scsi_port_seq_start,
- .next = scsi_port_seq_next,
- .stop = scsi_port_seq_stop,
- .show = scsi_port_seq_show
-};
-
-static int scsi_port_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &scsi_port_seq_ops);
-}
-
-static const struct file_operations scsi_port_seq_fops = {
- .owner = THIS_MODULE,
- .open = scsi_port_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-/*
- * SCSI Transport Table
- */
-static void *scsi_transport_seq_start(struct seq_file *seq, loff_t *pos)
-{
- return locate_hba_start(seq, pos);
-}
-
-static void *scsi_transport_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- return locate_hba_next(seq, v, pos);
-}
-
-static void scsi_transport_seq_stop(struct seq_file *seq, void *v)
-{
- locate_hba_stop(seq, v);
-}
-
-static int scsi_transport_seq_show(struct seq_file *seq, void *v)
-{
- struct se_hba *hba;
- struct se_subsystem_dev *se_dev = list_entry(v, struct se_subsystem_dev,
- g_se_dev_list);
- struct se_device *dev = se_dev->se_dev_ptr;
- struct se_port *se, *se_tmp;
- struct se_portal_group *tpg;
- struct t10_wwn *wwn;
- char buf[64];
-
- if (list_is_first(&se_dev->g_se_dev_list, &se_global->g_se_dev_list))
- seq_puts(seq, "inst device indx dev_name\n");
-
- if (!(dev))
- return 0;
-
- hba = dev->se_hba;
- if (!(hba)) {
- /* Log error ? */
- return 0;
- }
-
- wwn = DEV_T10_WWN(dev);
-
- spin_lock(&dev->se_port_lock);
- list_for_each_entry_safe(se, se_tmp, &dev->dev_sep_list, sep_list) {
- tpg = se->sep_tpg;
- sprintf(buf, "scsiTransport%s",
- TPG_TFO(tpg)->get_fabric_name());
-
- seq_printf(seq, "%u %s %u %s+%s\n",
- hba->hba_index, /* scsiTransportIndex */
- buf, /* scsiTransportType */
- (TPG_TFO(tpg)->tpg_get_inst_index != NULL) ?
- TPG_TFO(tpg)->tpg_get_inst_index(tpg) :
- 0,
- TPG_TFO(tpg)->tpg_get_wwn(tpg),
- (strlen(wwn->unit_serial)) ?
- /* scsiTransportDevName */
- wwn->unit_serial : wwn->vendor);
- }
- spin_unlock(&dev->se_port_lock);
-
- return 0;
-}
-
-static const struct seq_operations scsi_transport_seq_ops = {
- .start = scsi_transport_seq_start,
- .next = scsi_transport_seq_next,
- .stop = scsi_transport_seq_stop,
- .show = scsi_transport_seq_show
-};
-
-static int scsi_transport_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &scsi_transport_seq_ops);
-}
-
-static const struct file_operations scsi_transport_seq_fops = {
- .owner = THIS_MODULE,
- .open = scsi_transport_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-/*
- * SCSI Target Device Table
- */
-static void *scsi_tgt_dev_seq_start(struct seq_file *seq, loff_t *pos)
-{
- return locate_hba_start(seq, pos);
-}
-
-static void *scsi_tgt_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- return locate_hba_next(seq, v, pos);
-}
-
-static void scsi_tgt_dev_seq_stop(struct seq_file *seq, void *v)
-{
- locate_hba_stop(seq, v);
-}
-
-
-#define LU_COUNT 1 /* for now */
-static int scsi_tgt_dev_seq_show(struct seq_file *seq, void *v)
-{
- struct se_hba *hba;
- struct se_subsystem_dev *se_dev = list_entry(v, struct se_subsystem_dev,
- g_se_dev_list);
- struct se_device *dev = se_dev->se_dev_ptr;
- int non_accessible_lus = 0;
- char status[16];
-
- if (list_is_first(&se_dev->g_se_dev_list, &se_global->g_se_dev_list))
- seq_puts(seq, "inst indx num_LUs status non_access_LUs"
- " resets\n");
-
- if (!(dev))
- return 0;
-
- hba = dev->se_hba;
- if (!(hba)) {
- /* Log error ? */
- return 0;
- }
-
- switch (dev->dev_status) {
- case TRANSPORT_DEVICE_ACTIVATED:
- strcpy(status, "activated");
- break;
- case TRANSPORT_DEVICE_DEACTIVATED:
- strcpy(status, "deactivated");
- non_accessible_lus = 1;
- break;
- case TRANSPORT_DEVICE_SHUTDOWN:
- strcpy(status, "shutdown");
- non_accessible_lus = 1;
- break;
- case TRANSPORT_DEVICE_OFFLINE_ACTIVATED:
- case TRANSPORT_DEVICE_OFFLINE_DEACTIVATED:
- strcpy(status, "offline");
- non_accessible_lus = 1;
- break;
- default:
- sprintf(status, "unknown(%d)", dev->dev_status);
- non_accessible_lus = 1;
- }
-
- seq_printf(seq, "%u %u %u %s %u %u\n",
- hba->hba_index, dev->dev_index, LU_COUNT,
- status, non_accessible_lus, dev->num_resets);
-
- return 0;
-}
-
-static const struct seq_operations scsi_tgt_dev_seq_ops = {
- .start = scsi_tgt_dev_seq_start,
- .next = scsi_tgt_dev_seq_next,
- .stop = scsi_tgt_dev_seq_stop,
- .show = scsi_tgt_dev_seq_show
-};
-
-static int scsi_tgt_dev_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &scsi_tgt_dev_seq_ops);
-}
-
-static const struct file_operations scsi_tgt_dev_seq_fops = {
- .owner = THIS_MODULE,
- .open = scsi_tgt_dev_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-/*
- * SCSI Target Port Table
- */
-static void *scsi_tgt_port_seq_start(struct seq_file *seq, loff_t *pos)
-{
- return locate_hba_start(seq, pos);
-}
-
-static void *scsi_tgt_port_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- return locate_hba_next(seq, v, pos);
-}
-
-static void scsi_tgt_port_seq_stop(struct seq_file *seq, void *v)
-{
- locate_hba_stop(seq, v);
-}
-
-static int scsi_tgt_port_seq_show(struct seq_file *seq, void *v)
-{
- struct se_hba *hba;
- struct se_subsystem_dev *se_dev = list_entry(v, struct se_subsystem_dev,
- g_se_dev_list);
- struct se_device *dev = se_dev->se_dev_ptr;
- struct se_port *sep, *sep_tmp;
- struct se_portal_group *tpg;
- u32 rx_mbytes, tx_mbytes;
- unsigned long long num_cmds;
- char buf[64];
-
- if (list_is_first(&se_dev->g_se_dev_list, &se_global->g_se_dev_list))
- seq_puts(seq, "inst device indx name port_index in_cmds"
- " write_mbytes read_mbytes hs_in_cmds\n");
-
- if (!(dev))
- return 0;
-
- hba = dev->se_hba;
- if (!(hba)) {
- /* Log error ? */
- return 0;
- }
-
- spin_lock(&dev->se_port_lock);
- list_for_each_entry_safe(sep, sep_tmp, &dev->dev_sep_list, sep_list) {
- tpg = sep->sep_tpg;
- sprintf(buf, "%sPort#",
- TPG_TFO(tpg)->get_fabric_name());
-
- seq_printf(seq, "%u %u %u %s%d %s%s%d ",
- hba->hba_index,
- dev->dev_index,
- sep->sep_index,
- buf, sep->sep_index,
- TPG_TFO(tpg)->tpg_get_wwn(tpg), "+t+",
- TPG_TFO(tpg)->tpg_get_tag(tpg));
-
- spin_lock(&sep->sep_lun->lun_sep_lock);
- num_cmds = sep->sep_stats.cmd_pdus;
- rx_mbytes = (sep->sep_stats.rx_data_octets >> 20);
- tx_mbytes = (sep->sep_stats.tx_data_octets >> 20);
- spin_unlock(&sep->sep_lun->lun_sep_lock);
-
- seq_printf(seq, "%llu %u %u %u\n", num_cmds,
- rx_mbytes, tx_mbytes, 0);
- }
- spin_unlock(&dev->se_port_lock);
-
- return 0;
-}
-
-static const struct seq_operations scsi_tgt_port_seq_ops = {
- .start = scsi_tgt_port_seq_start,
- .next = scsi_tgt_port_seq_next,
- .stop = scsi_tgt_port_seq_stop,
- .show = scsi_tgt_port_seq_show
-};
-
-static int scsi_tgt_port_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &scsi_tgt_port_seq_ops);
-}
-
-static const struct file_operations scsi_tgt_port_seq_fops = {
- .owner = THIS_MODULE,
- .open = scsi_tgt_port_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-/*
- * SCSI Authorized Initiator Table:
- * It contains the SCSI Initiators authorized to be attached to one of the
- * local Target ports.
- * Iterates through all active TPGs and extracts the info from the ACLs
- */
-static void *scsi_auth_intr_seq_start(struct seq_file *seq, loff_t *pos)
-{
- spin_lock_bh(&se_global->se_tpg_lock);
- return seq_list_start(&se_global->g_se_tpg_list, *pos);
-}
-
-static void *scsi_auth_intr_seq_next(struct seq_file *seq, void *v,
- loff_t *pos)
-{
- return seq_list_next(v, &se_global->g_se_tpg_list, pos);
-}
-
-static void scsi_auth_intr_seq_stop(struct seq_file *seq, void *v)
-{
- spin_unlock_bh(&se_global->se_tpg_lock);
-}
-
-static int scsi_auth_intr_seq_show(struct seq_file *seq, void *v)
-{
- struct se_portal_group *se_tpg = list_entry(v, struct se_portal_group,
- se_tpg_list);
- struct se_dev_entry *deve;
- struct se_lun *lun;
- struct se_node_acl *se_nacl;
- int j;
-
- if (list_is_first(&se_tpg->se_tpg_list,
- &se_global->g_se_tpg_list))
- seq_puts(seq, "inst dev port indx dev_or_port intr_name "
- "map_indx att_count num_cmds read_mbytes "
- "write_mbytes hs_num_cmds creation_time row_status\n");
-
- if (!(se_tpg))
- return 0;
-
- spin_lock(&se_tpg->acl_node_lock);
- list_for_each_entry(se_nacl, &se_tpg->acl_node_list, acl_list) {
-
- atomic_inc(&se_nacl->mib_ref_count);
- smp_mb__after_atomic_inc();
- spin_unlock(&se_tpg->acl_node_lock);
-
- spin_lock_irq(&se_nacl->device_list_lock);
- for (j = 0; j < TRANSPORT_MAX_LUNS_PER_TPG; j++) {
- deve = &se_nacl->device_list[j];
- if (!(deve->lun_flags &
- TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) ||
- (!deve->se_lun))
- continue;
- lun = deve->se_lun;
- if (!lun->lun_se_dev)
- continue;
-
- seq_printf(seq, "%u %u %u %u %u %s %u %u %u %u %u %u"
- " %u %s\n",
- /* scsiInstIndex */
- (TPG_TFO(se_tpg)->tpg_get_inst_index != NULL) ?
- TPG_TFO(se_tpg)->tpg_get_inst_index(se_tpg) :
- 0,
- /* scsiDeviceIndex */
- lun->lun_se_dev->dev_index,
- /* scsiAuthIntrTgtPortIndex */
- TPG_TFO(se_tpg)->tpg_get_tag(se_tpg),
- /* scsiAuthIntrIndex */
- se_nacl->acl_index,
- /* scsiAuthIntrDevOrPort */
- 1,
- /* scsiAuthIntrName */
- se_nacl->initiatorname[0] ?
- se_nacl->initiatorname : NONE,
- /* FIXME: scsiAuthIntrLunMapIndex */
- 0,
- /* scsiAuthIntrAttachedTimes */
- deve->attach_count,
- /* scsiAuthIntrOutCommands */
- deve->total_cmds,
- /* scsiAuthIntrReadMegaBytes */
- (u32)(deve->read_bytes >> 20),
- /* scsiAuthIntrWrittenMegaBytes */
- (u32)(deve->write_bytes >> 20),
- /* FIXME: scsiAuthIntrHSOutCommands */
- 0,
- /* scsiAuthIntrLastCreation */
- (u32)(((u32)deve->creation_time -
- INITIAL_JIFFIES) * 100 / HZ),
- /* FIXME: scsiAuthIntrRowStatus */
- "Ready");
- }
- spin_unlock_irq(&se_nacl->device_list_lock);
-
- spin_lock(&se_tpg->acl_node_lock);
- atomic_dec(&se_nacl->mib_ref_count);
- smp_mb__after_atomic_dec();
- }
- spin_unlock(&se_tpg->acl_node_lock);
-
- return 0;
-}
-
-static const struct seq_operations scsi_auth_intr_seq_ops = {
- .start = scsi_auth_intr_seq_start,
- .next = scsi_auth_intr_seq_next,
- .stop = scsi_auth_intr_seq_stop,
- .show = scsi_auth_intr_seq_show
-};
-
-static int scsi_auth_intr_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &scsi_auth_intr_seq_ops);
-}
-
-static const struct file_operations scsi_auth_intr_seq_fops = {
- .owner = THIS_MODULE,
- .open = scsi_auth_intr_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-/*
- * SCSI Attached Initiator Port Table:
- * It lists the SCSI Initiators attached to one of the local Target ports.
- * Iterates through all active TPGs and use active sessions from each TPG
- * to list the info fo this table.
- */
-static void *scsi_att_intr_port_seq_start(struct seq_file *seq, loff_t *pos)
-{
- spin_lock_bh(&se_global->se_tpg_lock);
- return seq_list_start(&se_global->g_se_tpg_list, *pos);
-}
-
-static void *scsi_att_intr_port_seq_next(struct seq_file *seq, void *v,
- loff_t *pos)
-{
- return seq_list_next(v, &se_global->g_se_tpg_list, pos);
-}
-
-static void scsi_att_intr_port_seq_stop(struct seq_file *seq, void *v)
-{
- spin_unlock_bh(&se_global->se_tpg_lock);
-}
-
-static int scsi_att_intr_port_seq_show(struct seq_file *seq, void *v)
-{
- struct se_portal_group *se_tpg = list_entry(v, struct se_portal_group,
- se_tpg_list);
- struct se_dev_entry *deve;
- struct se_lun *lun;
- struct se_node_acl *se_nacl;
- struct se_session *se_sess;
- unsigned char buf[64];
- int j;
-
- if (list_is_first(&se_tpg->se_tpg_list,
- &se_global->g_se_tpg_list))
- seq_puts(seq, "inst dev port indx port_auth_indx port_name"
- " port_ident\n");
-
- if (!(se_tpg))
- return 0;
-
- spin_lock(&se_tpg->session_lock);
- list_for_each_entry(se_sess, &se_tpg->tpg_sess_list, sess_list) {
- if ((TPG_TFO(se_tpg)->sess_logged_in(se_sess)) ||
- (!se_sess->se_node_acl) ||
- (!se_sess->se_node_acl->device_list))
- continue;
-
- atomic_inc(&se_sess->mib_ref_count);
- smp_mb__after_atomic_inc();
- se_nacl = se_sess->se_node_acl;
- atomic_inc(&se_nacl->mib_ref_count);
- smp_mb__after_atomic_inc();
- spin_unlock(&se_tpg->session_lock);
-
- spin_lock_irq(&se_nacl->device_list_lock);
- for (j = 0; j < TRANSPORT_MAX_LUNS_PER_TPG; j++) {
- deve = &se_nacl->device_list[j];
- if (!(deve->lun_flags &
- TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) ||
- (!deve->se_lun))
- continue;
-
- lun = deve->se_lun;
- if (!lun->lun_se_dev)
- continue;
-
- memset(buf, 0, 64);
- if (TPG_TFO(se_tpg)->sess_get_initiator_sid != NULL)
- TPG_TFO(se_tpg)->sess_get_initiator_sid(
- se_sess, (unsigned char *)&buf[0], 64);
-
- seq_printf(seq, "%u %u %u %u %u %s+i+%s\n",
- /* scsiInstIndex */
- (TPG_TFO(se_tpg)->tpg_get_inst_index != NULL) ?
- TPG_TFO(se_tpg)->tpg_get_inst_index(se_tpg) :
- 0,
- /* scsiDeviceIndex */
- lun->lun_se_dev->dev_index,
- /* scsiPortIndex */
- TPG_TFO(se_tpg)->tpg_get_tag(se_tpg),
- /* scsiAttIntrPortIndex */
- (TPG_TFO(se_tpg)->sess_get_index != NULL) ?
- TPG_TFO(se_tpg)->sess_get_index(se_sess) :
- 0,
- /* scsiAttIntrPortAuthIntrIdx */
- se_nacl->acl_index,
- /* scsiAttIntrPortName */
- se_nacl->initiatorname[0] ?
- se_nacl->initiatorname : NONE,
- /* scsiAttIntrPortIdentifier */
- buf);
- }
- spin_unlock_irq(&se_nacl->device_list_lock);
-
- spin_lock(&se_tpg->session_lock);
- atomic_dec(&se_nacl->mib_ref_count);
- smp_mb__after_atomic_dec();
- atomic_dec(&se_sess->mib_ref_count);
- smp_mb__after_atomic_dec();
- }
- spin_unlock(&se_tpg->session_lock);
-
- return 0;
-}
-
-static const struct seq_operations scsi_att_intr_port_seq_ops = {
- .start = scsi_att_intr_port_seq_start,
- .next = scsi_att_intr_port_seq_next,
- .stop = scsi_att_intr_port_seq_stop,
- .show = scsi_att_intr_port_seq_show
-};
-
-static int scsi_att_intr_port_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &scsi_att_intr_port_seq_ops);
-}
-
-static const struct file_operations scsi_att_intr_port_seq_fops = {
- .owner = THIS_MODULE,
- .open = scsi_att_intr_port_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-/*
- * SCSI Logical Unit Table
- */
-static void *scsi_lu_seq_start(struct seq_file *seq, loff_t *pos)
-{
- return locate_hba_start(seq, pos);
-}
-
-static void *scsi_lu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- return locate_hba_next(seq, v, pos);
-}
-
-static void scsi_lu_seq_stop(struct seq_file *seq, void *v)
-{
- locate_hba_stop(seq, v);
-}
-
-#define SCSI_LU_INDEX 1
-static int scsi_lu_seq_show(struct seq_file *seq, void *v)
-{
- struct se_hba *hba;
- struct se_subsystem_dev *se_dev = list_entry(v, struct se_subsystem_dev,
- g_se_dev_list);
- struct se_device *dev = se_dev->se_dev_ptr;
- int j;
- char str[28];
-
- if (list_is_first(&se_dev->g_se_dev_list, &se_global->g_se_dev_list))
- seq_puts(seq, "inst dev indx LUN lu_name vend prod rev"
- " dev_type status state-bit num_cmds read_mbytes"
- " write_mbytes resets full_stat hs_num_cmds creation_time\n");
-
- if (!(dev))
- return 0;
-
- hba = dev->se_hba;
- if (!(hba)) {
- /* Log error ? */
- return 0;
- }
-
- /* Fix LU state, if we can read it from the device */
- seq_printf(seq, "%u %u %u %llu %s", hba->hba_index,
- dev->dev_index, SCSI_LU_INDEX,
- (unsigned long long)0, /* FIXME: scsiLuDefaultLun */
- (strlen(DEV_T10_WWN(dev)->unit_serial)) ?
- /* scsiLuWwnName */
- (char *)&DEV_T10_WWN(dev)->unit_serial[0] :
- "None");
-
- memcpy(&str[0], (void *)DEV_T10_WWN(dev), 28);
- /* scsiLuVendorId */
- for (j = 0; j < 8; j++)
- str[j] = ISPRINT(DEV_T10_WWN(dev)->vendor[j]) ?
- DEV_T10_WWN(dev)->vendor[j] : 0x20;
- str[8] = 0;
- seq_printf(seq, " %s", str);
-
- /* scsiLuProductId */
- for (j = 0; j < 16; j++)
- str[j] = ISPRINT(DEV_T10_WWN(dev)->model[j]) ?
- DEV_T10_WWN(dev)->model[j] : 0x20;
- str[16] = 0;
- seq_printf(seq, " %s", str);
-
- /* scsiLuRevisionId */
- for (j = 0; j < 4; j++)
- str[j] = ISPRINT(DEV_T10_WWN(dev)->revision[j]) ?
- DEV_T10_WWN(dev)->revision[j] : 0x20;
- str[4] = 0;
- seq_printf(seq, " %s", str);
-
- seq_printf(seq, " %u %s %s %llu %u %u %u %u %u %u\n",
- /* scsiLuPeripheralType */
- TRANSPORT(dev)->get_device_type(dev),
- (dev->dev_status == TRANSPORT_DEVICE_ACTIVATED) ?
- "available" : "notavailable", /* scsiLuStatus */
- "exposed", /* scsiLuState */
- (unsigned long long)dev->num_cmds,
- /* scsiLuReadMegaBytes */
- (u32)(dev->read_bytes >> 20),
- /* scsiLuWrittenMegaBytes */
- (u32)(dev->write_bytes >> 20),
- dev->num_resets, /* scsiLuInResets */
- 0, /* scsiLuOutTaskSetFullStatus */
- 0, /* scsiLuHSInCommands */
- (u32)(((u32)dev->creation_time - INITIAL_JIFFIES) *
- 100 / HZ));
-
- return 0;
-}
-
-static const struct seq_operations scsi_lu_seq_ops = {
- .start = scsi_lu_seq_start,
- .next = scsi_lu_seq_next,
- .stop = scsi_lu_seq_stop,
- .show = scsi_lu_seq_show
-};
-
-static int scsi_lu_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &scsi_lu_seq_ops);
-}
-
-static const struct file_operations scsi_lu_seq_fops = {
- .owner = THIS_MODULE,
- .open = scsi_lu_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-/****************************************************************************/
-
-/*
- * Remove proc fs entries
- */
-void remove_scsi_target_mib(void)
-{
- remove_proc_entry("scsi_target/mib/scsi_inst", NULL);
- remove_proc_entry("scsi_target/mib/scsi_dev", NULL);
- remove_proc_entry("scsi_target/mib/scsi_port", NULL);
- remove_proc_entry("scsi_target/mib/scsi_transport", NULL);
- remove_proc_entry("scsi_target/mib/scsi_tgt_dev", NULL);
- remove_proc_entry("scsi_target/mib/scsi_tgt_port", NULL);
- remove_proc_entry("scsi_target/mib/scsi_auth_intr", NULL);
- remove_proc_entry("scsi_target/mib/scsi_att_intr_port", NULL);
- remove_proc_entry("scsi_target/mib/scsi_lu", NULL);
- remove_proc_entry("scsi_target/mib", NULL);
-}
-
-/*
- * Create proc fs entries for the mib tables
- */
-int init_scsi_target_mib(void)
-{
- struct proc_dir_entry *dir_entry;
- struct proc_dir_entry *scsi_inst_entry;
- struct proc_dir_entry *scsi_dev_entry;
- struct proc_dir_entry *scsi_port_entry;
- struct proc_dir_entry *scsi_transport_entry;
- struct proc_dir_entry *scsi_tgt_dev_entry;
- struct proc_dir_entry *scsi_tgt_port_entry;
- struct proc_dir_entry *scsi_auth_intr_entry;
- struct proc_dir_entry *scsi_att_intr_port_entry;
- struct proc_dir_entry *scsi_lu_entry;
-
- dir_entry = proc_mkdir("scsi_target/mib", NULL);
- if (!(dir_entry)) {
- printk(KERN_ERR "proc_mkdir() failed.\n");
- return -1;
- }
-
- scsi_inst_entry =
- create_proc_entry("scsi_target/mib/scsi_inst", 0, NULL);
- if (scsi_inst_entry)
- scsi_inst_entry->proc_fops = &scsi_inst_seq_fops;
- else
- goto error;
-
- scsi_dev_entry =
- create_proc_entry("scsi_target/mib/scsi_dev", 0, NULL);
- if (scsi_dev_entry)
- scsi_dev_entry->proc_fops = &scsi_dev_seq_fops;
- else
- goto error;
-
- scsi_port_entry =
- create_proc_entry("scsi_target/mib/scsi_port", 0, NULL);
- if (scsi_port_entry)
- scsi_port_entry->proc_fops = &scsi_port_seq_fops;
- else
- goto error;
-
- scsi_transport_entry =
- create_proc_entry("scsi_target/mib/scsi_transport", 0, NULL);
- if (scsi_transport_entry)
- scsi_transport_entry->proc_fops = &scsi_transport_seq_fops;
- else
- goto error;
-
- scsi_tgt_dev_entry =
- create_proc_entry("scsi_target/mib/scsi_tgt_dev", 0, NULL);
- if (scsi_tgt_dev_entry)
- scsi_tgt_dev_entry->proc_fops = &scsi_tgt_dev_seq_fops;
- else
- goto error;
-
- scsi_tgt_port_entry =
- create_proc_entry("scsi_target/mib/scsi_tgt_port", 0, NULL);
- if (scsi_tgt_port_entry)
- scsi_tgt_port_entry->proc_fops = &scsi_tgt_port_seq_fops;
- else
- goto error;
-
- scsi_auth_intr_entry =
- create_proc_entry("scsi_target/mib/scsi_auth_intr", 0, NULL);
- if (scsi_auth_intr_entry)
- scsi_auth_intr_entry->proc_fops = &scsi_auth_intr_seq_fops;
- else
- goto error;
-
- scsi_att_intr_port_entry =
- create_proc_entry("scsi_target/mib/scsi_att_intr_port", 0, NULL);
- if (scsi_att_intr_port_entry)
- scsi_att_intr_port_entry->proc_fops =
- &scsi_att_intr_port_seq_fops;
- else
- goto error;
-
- scsi_lu_entry = create_proc_entry("scsi_target/mib/scsi_lu", 0, NULL);
- if (scsi_lu_entry)
- scsi_lu_entry->proc_fops = &scsi_lu_seq_fops;
- else
- goto error;
-
- return 0;
-
-error:
- printk(KERN_ERR "create_proc_entry() failed.\n");
- remove_scsi_target_mib();
- return -1;
-}
-
-/*
- * Initialize the index table for allocating unique row indexes to various mib
- * tables
- */
-void init_scsi_index_table(void)
-{
- memset(&scsi_index_table, 0, sizeof(struct scsi_index_table));
- spin_lock_init(&scsi_index_table.lock);
-}
-
-/*
- * Allocate a new row index for the entry type specified
- */
-u32 scsi_get_new_index(scsi_index_t type)
-{
- u32 new_index;
-
- if ((type < 0) || (type >= SCSI_INDEX_TYPE_MAX)) {
- printk(KERN_ERR "Invalid index type %d\n", type);
- return -1;
- }
-
- spin_lock(&scsi_index_table.lock);
- new_index = ++scsi_index_table.scsi_mib_index[type];
- if (new_index == 0)
- new_index = ++scsi_index_table.scsi_mib_index[type];
- spin_unlock(&scsi_index_table.lock);
-
- return new_index;
-}
-EXPORT_SYMBOL(scsi_get_new_index);
diff --git a/drivers/target/target_core_mib.h b/drivers/target/target_core_mib.h
deleted file mode 100644
index 277204633850..000000000000
--- a/drivers/target/target_core_mib.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef TARGET_CORE_MIB_H
-#define TARGET_CORE_MIB_H
-
-typedef enum {
- SCSI_INST_INDEX,
- SCSI_DEVICE_INDEX,
- SCSI_AUTH_INTR_INDEX,
- SCSI_INDEX_TYPE_MAX
-} scsi_index_t;
-
-struct scsi_index_table {
- spinlock_t lock;
- u32 scsi_mib_index[SCSI_INDEX_TYPE_MAX];
-} ____cacheline_aligned;
-
-/* SCSI Port stats */
-struct scsi_port_stats {
- u64 cmd_pdus;
- u64 tx_data_octets;
- u64 rx_data_octets;
-} ____cacheline_aligned;
-
-extern int init_scsi_target_mib(void);
-extern void remove_scsi_target_mib(void);
-extern void init_scsi_index_table(void);
-extern u32 scsi_get_new_index(scsi_index_t);
-
-#endif /*** TARGET_CORE_MIB_H ***/
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 742d24609a9b..f2a08477a68c 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -462,8 +462,8 @@ static struct se_device *pscsi_create_type_disk(
*/
bd = blkdev_get_by_path(se_dev->se_dev_udev_path,
FMODE_WRITE|FMODE_READ|FMODE_EXCL, pdv);
- if (!(bd)) {
- printk("pSCSI: blkdev_get_by_path() failed\n");
+ if (IS_ERR(bd)) {
+ printk(KERN_ERR "pSCSI: blkdev_get_by_path() failed\n");
scsi_device_put(sd);
return NULL;
}
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index 158cecbec718..4a109835e420 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -282,6 +282,9 @@ int core_tmr_lun_reset(
atomic_set(&task->task_active, 0);
atomic_set(&task->task_stop, 0);
+ } else {
+ if (atomic_read(&task->task_execute_queue) != 0)
+ transport_remove_task_from_execute_queue(task, dev);
}
__transport_stop_task_timer(task, &flags);
@@ -301,6 +304,7 @@ int core_tmr_lun_reset(
DEBUG_LR("LUN_RESET: got t_transport_active = 1 for"
" task: %p, t_fe_count: %d dev: %p\n", task,
fe_count, dev);
+ atomic_set(&T_TASK(cmd)->t_transport_aborted, 1);
spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock,
flags);
core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count);
@@ -310,6 +314,7 @@ int core_tmr_lun_reset(
}
DEBUG_LR("LUN_RESET: Got t_transport_active = 0 for task: %p,"
" t_fe_count: %d dev: %p\n", task, fe_count, dev);
+ atomic_set(&T_TASK(cmd)->t_transport_aborted, 1);
spin_unlock_irqrestore(&T_TASK(cmd)->t_state_lock, flags);
core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index abfa81a57115..c26f67467623 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -275,7 +275,6 @@ struct se_node_acl *core_tpg_check_initiator_node_acl(
spin_lock_init(&acl->device_list_lock);
spin_lock_init(&acl->nacl_sess_lock);
atomic_set(&acl->acl_pr_ref_count, 0);
- atomic_set(&acl->mib_ref_count, 0);
acl->queue_depth = TPG_TFO(tpg)->tpg_get_default_depth(tpg);
snprintf(acl->initiatorname, TRANSPORT_IQN_LEN, "%s", initiatorname);
acl->se_tpg = tpg;
@@ -318,12 +317,6 @@ void core_tpg_wait_for_nacl_pr_ref(struct se_node_acl *nacl)
cpu_relax();
}
-void core_tpg_wait_for_mib_ref(struct se_node_acl *nacl)
-{
- while (atomic_read(&nacl->mib_ref_count) != 0)
- cpu_relax();
-}
-
void core_tpg_clear_object_luns(struct se_portal_group *tpg)
{
int i, ret;
@@ -480,7 +473,6 @@ int core_tpg_del_initiator_node_acl(
spin_unlock_bh(&tpg->session_lock);
core_tpg_wait_for_nacl_pr_ref(acl);
- core_tpg_wait_for_mib_ref(acl);
core_clear_initiator_node_from_tpg(acl, tpg);
core_free_device_list_for_node(acl, tpg);
@@ -701,6 +693,8 @@ EXPORT_SYMBOL(core_tpg_register);
int core_tpg_deregister(struct se_portal_group *se_tpg)
{
+ struct se_node_acl *nacl, *nacl_tmp;
+
printk(KERN_INFO "TARGET_CORE[%s]: Deallocating %s struct se_portal_group"
" for endpoint: %s Portal Tag %u\n",
(se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) ?
@@ -714,6 +708,25 @@ int core_tpg_deregister(struct se_portal_group *se_tpg)
while (atomic_read(&se_tpg->tpg_pr_ref_count) != 0)
cpu_relax();
+ /*
+ * Release any remaining demo-mode generated se_node_acl that have
+ * not been released because of TFO->tpg_check_demo_mode_cache() == 1
+ * in transport_deregister_session().
+ */
+ spin_lock_bh(&se_tpg->acl_node_lock);
+ list_for_each_entry_safe(nacl, nacl_tmp, &se_tpg->acl_node_list,
+ acl_list) {
+ list_del(&nacl->acl_list);
+ se_tpg->num_node_acls--;
+ spin_unlock_bh(&se_tpg->acl_node_lock);
+
+ core_tpg_wait_for_nacl_pr_ref(nacl);
+ core_free_device_list_for_node(nacl, se_tpg);
+ TPG_TFO(se_tpg)->tpg_release_fabric_acl(se_tpg, nacl);
+
+ spin_lock_bh(&se_tpg->acl_node_lock);
+ }
+ spin_unlock_bh(&se_tpg->acl_node_lock);
if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL)
core_tpg_release_virtual_lun0(se_tpg);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 28b6292ff298..4bbf6c147f89 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -379,6 +379,40 @@ void release_se_global(void)
se_global = NULL;
}
+/* SCSI statistics table index */
+static struct scsi_index_table scsi_index_table;
+
+/*
+ * Initialize the index table for allocating unique row indexes to various mib
+ * tables.
+ */
+void init_scsi_index_table(void)
+{
+ memset(&scsi_index_table, 0, sizeof(struct scsi_index_table));
+ spin_lock_init(&scsi_index_table.lock);
+}
+
+/*
+ * Allocate a new row index for the entry type specified
+ */
+u32 scsi_get_new_index(scsi_index_t type)
+{
+ u32 new_index;
+
+ if ((type < 0) || (type >= SCSI_INDEX_TYPE_MAX)) {
+ printk(KERN_ERR "Invalid index type %d\n", type);
+ return -EINVAL;
+ }
+
+ spin_lock(&scsi_index_table.lock);
+ new_index = ++scsi_index_table.scsi_mib_index[type];
+ if (new_index == 0)
+ new_index = ++scsi_index_table.scsi_mib_index[type];
+ spin_unlock(&scsi_index_table.lock);
+
+ return new_index;
+}
+
void transport_init_queue_obj(struct se_queue_obj *qobj)
{
atomic_set(&qobj->queue_cnt, 0);
@@ -437,7 +471,6 @@ struct se_session *transport_init_session(void)
}
INIT_LIST_HEAD(&se_sess->sess_list);
INIT_LIST_HEAD(&se_sess->sess_acl_list);
- atomic_set(&se_sess->mib_ref_count, 0);
return se_sess;
}
@@ -546,12 +579,6 @@ void transport_deregister_session(struct se_session *se_sess)
transport_free_session(se_sess);
return;
}
- /*
- * Wait for possible reference in drivers/target/target_core_mib.c:
- * scsi_att_intr_port_seq_show()
- */
- while (atomic_read(&se_sess->mib_ref_count) != 0)
- cpu_relax();
spin_lock_bh(&se_tpg->session_lock);
list_del(&se_sess->sess_list);
@@ -574,7 +601,6 @@ void transport_deregister_session(struct se_session *se_sess)
spin_unlock_bh(&se_tpg->acl_node_lock);
core_tpg_wait_for_nacl_pr_ref(se_nacl);
- core_tpg_wait_for_mib_ref(se_nacl);
core_free_device_list_for_node(se_nacl, se_tpg);
TPG_TFO(se_tpg)->tpg_release_fabric_acl(se_tpg,
se_nacl);
@@ -1181,7 +1207,7 @@ transport_get_task_from_execute_queue(struct se_device *dev)
*
*
*/
-static void transport_remove_task_from_execute_queue(
+void transport_remove_task_from_execute_queue(
struct se_task *task,
struct se_device *dev)
{
@@ -4827,6 +4853,8 @@ static int transport_do_se_mem_map(
return ret;
}
+
+ BUG_ON(list_empty(se_mem_list));
/*
* This is the normal path for all normal non BIDI and BIDI-COMMAND
* WRITE payloads.. If we need to do BIDI READ passthrough for
@@ -5008,7 +5036,9 @@ transport_map_control_cmd_to_task(struct se_cmd *cmd)
struct se_mem *se_mem = NULL, *se_mem_lout = NULL;
u32 se_mem_cnt = 0, task_offset = 0;
- BUG_ON(list_empty(cmd->t_task->t_mem_list));
+ if (!list_empty(T_TASK(cmd)->t_mem_list))
+ se_mem = list_entry(T_TASK(cmd)->t_mem_list->next,
+ struct se_mem, se_list);
ret = transport_do_se_mem_map(dev, task,
cmd->t_task->t_mem_list, NULL, se_mem,
@@ -5519,7 +5549,8 @@ static void transport_generic_wait_for_tasks(
atomic_set(&T_TASK(cmd)->transport_lun_stop, 0);
}
- if (!atomic_read(&T_TASK(cmd)->t_transport_active))
+ if (!atomic_read(&T_TASK(cmd)->t_transport_active) ||
+ atomic_read(&T_TASK(cmd)->t_transport_aborted))
goto remove;
atomic_set(&T_TASK(cmd)->t_transport_stop, 1);
@@ -5926,6 +5957,9 @@ static void transport_processing_shutdown(struct se_device *dev)
atomic_set(&task->task_active, 0);
atomic_set(&task->task_stop, 0);
+ } else {
+ if (atomic_read(&task->task_execute_queue) != 0)
+ transport_remove_task_from_execute_queue(task, dev);
}
__transport_stop_task_timer(task, &flags);
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index f7a5dba3ca23..bf7c687519ef 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -4,7 +4,6 @@
menuconfig THERMAL
tristate "Generic Thermal sysfs driver"
- depends on NET
help
Generic Thermal Sysfs driver offers a generic mechanism for
thermal management. Usually it's made up of one or more thermal
diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c
index 7d0e63c79280..713b7ea4a607 100644
--- a/drivers/thermal/thermal_sys.c
+++ b/drivers/thermal/thermal_sys.c
@@ -62,20 +62,6 @@ static DEFINE_MUTEX(thermal_list_lock);
static unsigned int thermal_event_seqnum;
-static struct genl_family thermal_event_genl_family = {
- .id = GENL_ID_GENERATE,
- .name = THERMAL_GENL_FAMILY_NAME,
- .version = THERMAL_GENL_VERSION,
- .maxattr = THERMAL_GENL_ATTR_MAX,
-};
-
-static struct genl_multicast_group thermal_event_mcgrp = {
- .name = THERMAL_GENL_MCAST_GROUP_NAME,
-};
-
-static int genetlink_init(void);
-static void genetlink_exit(void);
-
static int get_idr(struct idr *idr, struct mutex *lock, int *id)
{
int err;
@@ -1225,6 +1211,18 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
EXPORT_SYMBOL(thermal_zone_device_unregister);
+#ifdef CONFIG_NET
+static struct genl_family thermal_event_genl_family = {
+ .id = GENL_ID_GENERATE,
+ .name = THERMAL_GENL_FAMILY_NAME,
+ .version = THERMAL_GENL_VERSION,
+ .maxattr = THERMAL_GENL_ATTR_MAX,
+};
+
+static struct genl_multicast_group thermal_event_mcgrp = {
+ .name = THERMAL_GENL_MCAST_GROUP_NAME,
+};
+
int generate_netlink_event(u32 orig, enum events event)
{
struct sk_buff *skb;
@@ -1301,6 +1299,15 @@ static int genetlink_init(void)
return result;
}
+static void genetlink_exit(void)
+{
+ genl_unregister_family(&thermal_event_genl_family);
+}
+#else /* !CONFIG_NET */
+static inline int genetlink_init(void) { return 0; }
+static inline void genetlink_exit(void) {}
+#endif /* !CONFIG_NET */
+
static int __init thermal_init(void)
{
int result = 0;
@@ -1316,11 +1323,6 @@ static int __init thermal_init(void)
return result;
}
-static void genetlink_exit(void)
-{
- genl_unregister_family(&thermal_event_genl_family);
-}
-
static void __exit thermal_exit(void)
{
class_unregister(&thermal_class);
diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c
index beb1afa27d8d..7b951adac54b 100644
--- a/drivers/tty/serial/max3100.c
+++ b/drivers/tty/serial/max3100.c
@@ -601,7 +601,7 @@ static int max3100_startup(struct uart_port *port)
s->rts = 0;
sprintf(b, "max3100-%d", s->minor);
- s->workqueue = create_freezeable_workqueue(b);
+ s->workqueue = create_freezable_workqueue(b);
if (!s->workqueue) {
dev_warn(&s->spi->dev, "cannot create workqueue\n");
return -EBUSY;
diff --git a/drivers/tty/serial/max3107.c b/drivers/tty/serial/max3107.c
index 910870edf708..750b4f627315 100644
--- a/drivers/tty/serial/max3107.c
+++ b/drivers/tty/serial/max3107.c
@@ -833,7 +833,7 @@ static int max3107_startup(struct uart_port *port)
struct max3107_port *s = container_of(port, struct max3107_port, port);
/* Initialize work queue */
- s->workqueue = create_freezeable_workqueue("max3107");
+ s->workqueue = create_freezable_workqueue("max3107");
if (!s->workqueue) {
dev_err(&s->spi->dev, "Workqueue creation failed\n");
return -EBUSY;
diff --git a/drivers/tty/serial/serial_cs.c b/drivers/tty/serial/serial_cs.c
index 93760b2ea172..1ef4df9bf7e4 100644
--- a/drivers/tty/serial/serial_cs.c
+++ b/drivers/tty/serial/serial_cs.c
@@ -712,6 +712,7 @@ static struct pcmcia_device_id serial_ids[] = {
PCMCIA_PFC_DEVICE_PROD_ID12(1, "Xircom", "CreditCard Ethernet+Modem II", 0x2e3ee845, 0xeca401bf),
PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0e01),
PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0a05),
+ PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x0b05),
PCMCIA_PFC_DEVICE_MANF_CARD(1, 0x0032, 0x1101),
PCMCIA_MFC_DEVICE_MANF_CARD(0, 0x0104, 0x0070),
PCMCIA_MFC_DEVICE_MANF_CARD(1, 0x0101, 0x0562),
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index f71e8e307e0f..64a035ba2eab 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -335,7 +335,7 @@ void usb_hcd_pci_shutdown(struct pci_dev *dev)
}
EXPORT_SYMBOL_GPL(usb_hcd_pci_shutdown);
-#ifdef CONFIG_PM_OPS
+#ifdef CONFIG_PM
#ifdef CONFIG_PPC_PMAC
static void powermac_set_asic(struct pci_dev *pci_dev, int enable)
@@ -580,4 +580,4 @@ const struct dev_pm_ops usb_hcd_pci_pm_ops = {
};
EXPORT_SYMBOL_GPL(usb_hcd_pci_pm_ops);
-#endif /* CONFIG_PM_OPS */
+#endif /* CONFIG_PM */
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index d041c6826e43..19d3435e6140 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1465,6 +1465,7 @@ void usb_set_device_state(struct usb_device *udev,
enum usb_device_state new_state)
{
unsigned long flags;
+ int wakeup = -1;
spin_lock_irqsave(&device_state_lock, flags);
if (udev->state == USB_STATE_NOTATTACHED)
@@ -1479,11 +1480,10 @@ void usb_set_device_state(struct usb_device *udev,
|| new_state == USB_STATE_SUSPENDED)
; /* No change to wakeup settings */
else if (new_state == USB_STATE_CONFIGURED)
- device_set_wakeup_capable(&udev->dev,
- (udev->actconfig->desc.bmAttributes
- & USB_CONFIG_ATT_WAKEUP));
+ wakeup = udev->actconfig->desc.bmAttributes
+ & USB_CONFIG_ATT_WAKEUP;
else
- device_set_wakeup_capable(&udev->dev, 0);
+ wakeup = 0;
}
if (udev->state == USB_STATE_SUSPENDED &&
new_state != USB_STATE_SUSPENDED)
@@ -1495,6 +1495,8 @@ void usb_set_device_state(struct usb_device *udev,
} else
recursively_mark_NOTATTACHED(udev);
spin_unlock_irqrestore(&device_state_lock, flags);
+ if (wakeup >= 0)
+ device_set_wakeup_capable(&udev->dev, wakeup);
}
EXPORT_SYMBOL_GPL(usb_set_device_state);
@@ -2681,17 +2683,13 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
mutex_lock(&usb_address0_mutex);
- if (!udev->config && oldspeed == USB_SPEED_SUPER) {
- /* Don't reset USB 3.0 devices during an initial setup */
- usb_set_device_state(udev, USB_STATE_DEFAULT);
- } else {
- /* Reset the device; full speed may morph to high speed */
- /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */
- retval = hub_port_reset(hub, port1, udev, delay);
- if (retval < 0) /* error or disconnect */
- goto fail;
- /* success, speed is known */
- }
+ /* Reset the device; full speed may morph to high speed */
+ /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */
+ retval = hub_port_reset(hub, port1, udev, delay);
+ if (retval < 0) /* error or disconnect */
+ goto fail;
+ /* success, speed is known */
+
retval = -ENODEV;
if (oldspeed != USB_SPEED_UNKNOWN && oldspeed != udev->speed) {
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 44c595432d6f..81ce6a8e1d94 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -48,6 +48,10 @@ static const struct usb_device_id usb_quirk_list[] = {
{ USB_DEVICE(0x04b4, 0x0526), .driver_info =
USB_QUIRK_CONFIG_INTF_STRINGS },
+ /* Samsung Android phone modem - ID conflict with SPH-I500 */
+ { USB_DEVICE(0x04e8, 0x6601), .driver_info =
+ USB_QUIRK_CONFIG_INTF_STRINGS },
+
/* Roland SC-8820 */
{ USB_DEVICE(0x0582, 0x0007), .driver_info = USB_QUIRK_RESET_RESUME },
@@ -68,6 +72,10 @@ static const struct usb_device_id usb_quirk_list[] = {
/* M-Systems Flash Disk Pioneers */
{ USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME },
+ /* Keytouch QWERTY Panel keyboard */
+ { USB_DEVICE(0x0926, 0x3333), .driver_info =
+ USB_QUIRK_CONFIG_INTF_STRINGS },
+
/* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */
{ USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF },
diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c
index 3c6e1a058745..5e1495097ec3 100644
--- a/drivers/usb/gadget/f_phonet.c
+++ b/drivers/usb/gadget/f_phonet.c
@@ -346,14 +346,19 @@ static void pn_rx_complete(struct usb_ep *ep, struct usb_request *req)
if (unlikely(!skb))
break;
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, 0,
- req->actual);
- page = NULL;
- if (req->actual < req->length) { /* Last fragment */
+ if (skb->len == 0) { /* First fragment */
skb->protocol = htons(ETH_P_PHONET);
skb_reset_mac_header(skb);
- pskb_pull(skb, 1);
+ /* Can't use pskb_pull() on page in IRQ */
+ memcpy(skb_put(skb, 1), page_address(page), 1);
+ }
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+ skb->len == 0, req->actual);
+ page = NULL;
+
+ if (req->actual < req->length) { /* Last fragment */
skb->dev = dev;
dev->stats.rx_packets++;
dev->stats.rx_bytes += skb->len;
diff --git a/drivers/usb/host/ehci-xilinx-of.c b/drivers/usb/host/ehci-xilinx-of.c
index e8f4f36fdf0b..a6f21b891f68 100644
--- a/drivers/usb/host/ehci-xilinx-of.c
+++ b/drivers/usb/host/ehci-xilinx-of.c
@@ -29,6 +29,7 @@
#include <linux/of.h>
#include <linux/of_platform.h>
+#include <linux/of_address.h>
/**
* ehci_xilinx_of_setup - Initialize the device for ehci_reset()
diff --git a/drivers/usb/host/xhci-dbg.c b/drivers/usb/host/xhci-dbg.c
index fcbf4abbf381..0231814a97a5 100644
--- a/drivers/usb/host/xhci-dbg.c
+++ b/drivers/usb/host/xhci-dbg.c
@@ -169,9 +169,10 @@ static void xhci_print_ports(struct xhci_hcd *xhci)
}
}
-void xhci_print_ir_set(struct xhci_hcd *xhci, struct xhci_intr_reg *ir_set, int set_num)
+void xhci_print_ir_set(struct xhci_hcd *xhci, int set_num)
{
- void *addr;
+ struct xhci_intr_reg __iomem *ir_set = &xhci->run_regs->ir_set[set_num];
+ void __iomem *addr;
u32 temp;
u64 temp_64;
@@ -449,7 +450,7 @@ char *xhci_get_slot_state(struct xhci_hcd *xhci,
}
}
-void xhci_dbg_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx)
+static void xhci_dbg_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx)
{
/* Fields are 32 bits wide, DMA addresses are in bytes */
int field_size = 32 / 8;
@@ -488,7 +489,7 @@ void xhci_dbg_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx)
dbg_rsvd64(xhci, (u64 *)slot_ctx, dma);
}
-void xhci_dbg_ep_ctx(struct xhci_hcd *xhci,
+static void xhci_dbg_ep_ctx(struct xhci_hcd *xhci,
struct xhci_container_ctx *ctx,
unsigned int last_ep)
{
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 1d0f45f0e7a6..a9534396e85b 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -307,7 +307,7 @@ struct xhci_ep_ctx *xhci_get_ep_ctx(struct xhci_hcd *xhci,
/***************** Streams structures manipulation *************************/
-void xhci_free_stream_ctx(struct xhci_hcd *xhci,
+static void xhci_free_stream_ctx(struct xhci_hcd *xhci,
unsigned int num_stream_ctxs,
struct xhci_stream_ctx *stream_ctx, dma_addr_t dma)
{
@@ -335,7 +335,7 @@ void xhci_free_stream_ctx(struct xhci_hcd *xhci,
* The stream context array must be a power of 2, and can be as small as
* 64 bytes or as large as 1MB.
*/
-struct xhci_stream_ctx *xhci_alloc_stream_ctx(struct xhci_hcd *xhci,
+static struct xhci_stream_ctx *xhci_alloc_stream_ctx(struct xhci_hcd *xhci,
unsigned int num_stream_ctxs, dma_addr_t *dma,
gfp_t mem_flags)
{
@@ -1900,11 +1900,11 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
val &= DBOFF_MASK;
xhci_dbg(xhci, "// Doorbell array is located at offset 0x%x"
" from cap regs base addr\n", val);
- xhci->dba = (void *) xhci->cap_regs + val;
+ xhci->dba = (void __iomem *) xhci->cap_regs + val;
xhci_dbg_regs(xhci);
xhci_print_run_regs(xhci);
/* Set ir_set to interrupt register set 0 */
- xhci->ir_set = (void *) xhci->run_regs->ir_set;
+ xhci->ir_set = &xhci->run_regs->ir_set[0];
/*
* Event ring setup: Allocate a normal ring, but also setup
@@ -1961,7 +1961,7 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
/* Set the event ring dequeue address */
xhci_set_hc_event_deq(xhci);
xhci_dbg(xhci, "Wrote ERST address to ir_set 0.\n");
- xhci_print_ir_set(xhci, xhci->ir_set, 0);
+ xhci_print_ir_set(xhci, 0);
/*
* XXX: Might need to set the Interrupter Moderation Register to
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 3e8211c1ce5a..3289bf4832c9 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -474,8 +474,11 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
state->new_deq_seg = find_trb_seg(cur_td->start_seg,
dev->eps[ep_index].stopped_trb,
&state->new_cycle_state);
- if (!state->new_deq_seg)
- BUG();
+ if (!state->new_deq_seg) {
+ WARN_ON(1);
+ return;
+ }
+
/* Dig out the cycle state saved by the xHC during the stop ep cmd */
xhci_dbg(xhci, "Finding endpoint context\n");
ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index);
@@ -486,8 +489,10 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
state->new_deq_seg = find_trb_seg(state->new_deq_seg,
state->new_deq_ptr,
&state->new_cycle_state);
- if (!state->new_deq_seg)
- BUG();
+ if (!state->new_deq_seg) {
+ WARN_ON(1);
+ return;
+ }
trb = &state->new_deq_ptr->generic;
if ((trb->field[3] & TRB_TYPE_BITMASK) == TRB_TYPE(TRB_LINK) &&
@@ -2363,12 +2368,13 @@ static unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb *urb)
/* Scatter gather list entries may cross 64KB boundaries */
running_total = TRB_MAX_BUFF_SIZE -
- (sg_dma_address(sg) & ((1 << TRB_MAX_BUFF_SHIFT) - 1));
+ (sg_dma_address(sg) & (TRB_MAX_BUFF_SIZE - 1));
+ running_total &= TRB_MAX_BUFF_SIZE - 1;
if (running_total != 0)
num_trbs++;
/* How many more 64KB chunks to transfer, how many more TRBs? */
- while (running_total < sg_dma_len(sg)) {
+ while (running_total < sg_dma_len(sg) && running_total < temp) {
num_trbs++;
running_total += TRB_MAX_BUFF_SIZE;
}
@@ -2394,11 +2400,11 @@ static unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb *urb)
static void check_trb_math(struct urb *urb, int num_trbs, int running_total)
{
if (num_trbs != 0)
- dev_dbg(&urb->dev->dev, "%s - ep %#x - Miscalculated number of "
+ dev_err(&urb->dev->dev, "%s - ep %#x - Miscalculated number of "
"TRBs, %d left\n", __func__,
urb->ep->desc.bEndpointAddress, num_trbs);
if (running_total != urb->transfer_buffer_length)
- dev_dbg(&urb->dev->dev, "%s - ep %#x - Miscalculated tx length, "
+ dev_err(&urb->dev->dev, "%s - ep %#x - Miscalculated tx length, "
"queued %#x (%d), asked for %#x (%d)\n",
__func__,
urb->ep->desc.bEndpointAddress,
@@ -2533,8 +2539,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
sg = urb->sg;
addr = (u64) sg_dma_address(sg);
this_sg_len = sg_dma_len(sg);
- trb_buff_len = TRB_MAX_BUFF_SIZE -
- (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1));
+ trb_buff_len = TRB_MAX_BUFF_SIZE - (addr & (TRB_MAX_BUFF_SIZE - 1));
trb_buff_len = min_t(int, trb_buff_len, this_sg_len);
if (trb_buff_len > urb->transfer_buffer_length)
trb_buff_len = urb->transfer_buffer_length;
@@ -2572,7 +2577,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
(unsigned int) (addr + TRB_MAX_BUFF_SIZE) & ~(TRB_MAX_BUFF_SIZE - 1),
(unsigned int) addr + trb_buff_len);
if (TRB_MAX_BUFF_SIZE -
- (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1)) < trb_buff_len) {
+ (addr & (TRB_MAX_BUFF_SIZE - 1)) < trb_buff_len) {
xhci_warn(xhci, "WARN: sg dma xfer crosses 64KB boundaries!\n");
xhci_dbg(xhci, "Next boundary at %#x, end dma = %#x\n",
(unsigned int) (addr + TRB_MAX_BUFF_SIZE) & ~(TRB_MAX_BUFF_SIZE - 1),
@@ -2616,7 +2621,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
}
trb_buff_len = TRB_MAX_BUFF_SIZE -
- (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1));
+ (addr & (TRB_MAX_BUFF_SIZE - 1));
trb_buff_len = min_t(int, trb_buff_len, this_sg_len);
if (running_total + trb_buff_len > urb->transfer_buffer_length)
trb_buff_len =
@@ -2656,7 +2661,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
num_trbs = 0;
/* How much data is (potentially) left before the 64KB boundary? */
running_total = TRB_MAX_BUFF_SIZE -
- (urb->transfer_dma & ((1 << TRB_MAX_BUFF_SHIFT) - 1));
+ (urb->transfer_dma & (TRB_MAX_BUFF_SIZE - 1));
+ running_total &= TRB_MAX_BUFF_SIZE - 1;
/* If there's some data on this 64KB chunk, or we have to send a
* zero-length transfer, we need at least one TRB
@@ -2700,8 +2706,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
/* How much data is in the first TRB? */
addr = (u64) urb->transfer_dma;
trb_buff_len = TRB_MAX_BUFF_SIZE -
- (urb->transfer_dma & ((1 << TRB_MAX_BUFF_SHIFT) - 1));
- if (urb->transfer_buffer_length < trb_buff_len)
+ (urb->transfer_dma & (TRB_MAX_BUFF_SIZE - 1));
+ if (trb_buff_len > urb->transfer_buffer_length)
trb_buff_len = urb->transfer_buffer_length;
first_trb = true;
@@ -2879,8 +2885,8 @@ static int count_isoc_trbs_needed(struct xhci_hcd *xhci,
addr = (u64) (urb->transfer_dma + urb->iso_frame_desc[i].offset);
td_len = urb->iso_frame_desc[i].length;
- running_total = TRB_MAX_BUFF_SIZE -
- (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1));
+ running_total = TRB_MAX_BUFF_SIZE - (addr & (TRB_MAX_BUFF_SIZE - 1));
+ running_total &= TRB_MAX_BUFF_SIZE - 1;
if (running_total != 0)
num_trbs++;
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 34cf4e165877..2083fc2179b2 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -109,7 +109,7 @@ int xhci_halt(struct xhci_hcd *xhci)
/*
* Set the run bit and wait for the host to be running.
*/
-int xhci_start(struct xhci_hcd *xhci)
+static int xhci_start(struct xhci_hcd *xhci)
{
u32 temp;
int ret;
@@ -329,7 +329,7 @@ int xhci_init(struct usb_hcd *hcd)
#ifdef CONFIG_USB_XHCI_HCD_DEBUGGING
-void xhci_event_ring_work(unsigned long arg)
+static void xhci_event_ring_work(unsigned long arg)
{
unsigned long flags;
int temp;
@@ -473,7 +473,7 @@ int xhci_run(struct usb_hcd *hcd)
xhci->ir_set, (unsigned int) ER_IRQ_ENABLE(temp));
xhci_writel(xhci, ER_IRQ_ENABLE(temp),
&xhci->ir_set->irq_pending);
- xhci_print_ir_set(xhci, xhci->ir_set, 0);
+ xhci_print_ir_set(xhci, 0);
if (NUM_TEST_NOOPS > 0)
doorbell = xhci_setup_one_noop(xhci);
@@ -528,7 +528,7 @@ void xhci_stop(struct usb_hcd *hcd)
temp = xhci_readl(xhci, &xhci->ir_set->irq_pending);
xhci_writel(xhci, ER_IRQ_DISABLE(temp),
&xhci->ir_set->irq_pending);
- xhci_print_ir_set(xhci, xhci->ir_set, 0);
+ xhci_print_ir_set(xhci, 0);
xhci_dbg(xhci, "cleaning up memory\n");
xhci_mem_cleanup(xhci);
@@ -755,7 +755,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
temp = xhci_readl(xhci, &xhci->ir_set->irq_pending);
xhci_writel(xhci, ER_IRQ_DISABLE(temp),
&xhci->ir_set->irq_pending);
- xhci_print_ir_set(xhci, xhci->ir_set, 0);
+ xhci_print_ir_set(xhci, 0);
xhci_dbg(xhci, "cleaning up memory\n");
xhci_mem_cleanup(xhci);
@@ -857,7 +857,7 @@ unsigned int xhci_last_valid_endpoint(u32 added_ctxs)
/* Returns 1 if the arguments are OK;
* returns 0 this is a root hub; returns -EINVAL for NULL pointers.
*/
-int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev,
+static int xhci_check_args(struct usb_hcd *hcd, struct usb_device *udev,
struct usb_host_endpoint *ep, int check_ep, bool check_virt_dev,
const char *func) {
struct xhci_hcd *xhci;
@@ -1693,7 +1693,7 @@ static void xhci_setup_input_ctx_for_config_ep(struct xhci_hcd *xhci,
xhci_dbg_ctx(xhci, in_ctx, xhci_last_valid_endpoint(add_flags));
}
-void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci,
+static void xhci_setup_input_ctx_for_quirk(struct xhci_hcd *xhci,
unsigned int slot_id, unsigned int ep_index,
struct xhci_dequeue_state *deq_state)
{
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 7f236fd22015..7f127df6dd55 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1348,7 +1348,7 @@ static inline int xhci_link_trb_quirk(struct xhci_hcd *xhci)
}
/* xHCI debugging */
-void xhci_print_ir_set(struct xhci_hcd *xhci, struct xhci_intr_reg *ir_set, int set_num);
+void xhci_print_ir_set(struct xhci_hcd *xhci, int set_num);
void xhci_print_registers(struct xhci_hcd *xhci);
void xhci_dbg_regs(struct xhci_hcd *xhci);
void xhci_print_run_regs(struct xhci_hcd *xhci);
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 54a8bd1047d6..c292d5c499e7 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1864,6 +1864,7 @@ allocate_instance(struct device *dev,
INIT_LIST_HEAD(&musb->out_bulk);
hcd->uses_new_polling = 1;
+ hcd->has_tt = 1;
musb->vbuserr_retry = VBUSERR_RETRY_COUNT;
musb->a_wait_bcon = OTG_TIME_A_WAIT_BCON;
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index d74a8113ae74..e6400be8a0f8 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -488,6 +488,15 @@ struct musb {
unsigned set_address:1;
unsigned test_mode:1;
unsigned softconnect:1;
+
+ u8 address;
+ u8 test_mode_nr;
+ u16 ackpend; /* ep0 */
+ enum musb_g_ep0_state ep0_state;
+ struct usb_gadget g; /* the gadget */
+ struct usb_gadget_driver *gadget_driver; /* its driver */
+#endif
+
/*
* FIXME: Remove this flag.
*
@@ -501,14 +510,6 @@ struct musb {
*/
unsigned double_buffer_not_ok:1 __deprecated;
- u8 address;
- u8 test_mode_nr;
- u16 ackpend; /* ep0 */
- enum musb_g_ep0_state ep0_state;
- struct usb_gadget g; /* the gadget */
- struct usb_gadget_driver *gadget_driver; /* its driver */
-#endif
-
struct musb_hdrc_config *config;
#ifdef MUSB_CONFIG_PROC_FS
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index a3f12333fc41..bc8badd16897 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -362,6 +362,7 @@ static int omap2430_musb_init(struct musb *musb)
static int omap2430_musb_exit(struct musb *musb)
{
+ del_timer_sync(&musb_idle_timer);
omap2430_low_level_exit(musb);
otg_put_transceiver(musb->xceiv);
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index 7481ff8a49e4..0457813eebee 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -301,6 +301,9 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless Direct IP modems */
.driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
},
+ { USB_DEVICE(0x0f3d, 0x68A3), /* Airprime/Sierra Wireless Direct IP modems */
+ .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
+ },
{ USB_DEVICE(0x413C, 0x08133) }, /* Dell Computer Corp. Wireless 5720 VZW Mobile Broadband (EVDO Rev-A) Minicard GPS Port */
{ }
diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c
index b004b2a485c3..9c014e2ecd68 100644
--- a/drivers/usb/serial/usb_wwan.c
+++ b/drivers/usb/serial/usb_wwan.c
@@ -295,12 +295,15 @@ static void usb_wwan_indat_callback(struct urb *urb)
__func__, status, endpoint);
} else {
tty = tty_port_tty_get(&port->port);
- if (urb->actual_length) {
- tty_insert_flip_string(tty, data, urb->actual_length);
- tty_flip_buffer_push(tty);
- } else
- dbg("%s: empty read urb received", __func__);
- tty_kref_put(tty);
+ if (tty) {
+ if (urb->actual_length) {
+ tty_insert_flip_string(tty, data,
+ urb->actual_length);
+ tty_flip_buffer_push(tty);
+ } else
+ dbg("%s: empty read urb received", __func__);
+ tty_kref_put(tty);
+ }
/* Resubmit urb so we continue receiving */
if (status != -ESHUTDOWN) {
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 15a5d89b7f39..1c11959a7d58 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -27,6 +27,7 @@
#include <linux/uaccess.h>
#include <linux/usb.h>
#include <linux/usb/serial.h>
+#include <linux/usb/cdc.h>
#include "visor.h"
/*
@@ -479,6 +480,17 @@ static int visor_probe(struct usb_serial *serial,
dbg("%s", __func__);
+ /*
+ * some Samsung Android phones in modem mode have the same ID
+ * as SPH-I500, but they are ACM devices, so dont bind to them
+ */
+ if (id->idVendor == SAMSUNG_VENDOR_ID &&
+ id->idProduct == SAMSUNG_SPH_I500_ID &&
+ serial->dev->descriptor.bDeviceClass == USB_CLASS_COMM &&
+ serial->dev->descriptor.bDeviceSubClass ==
+ USB_CDC_SUBCLASS_ACM)
+ return -ENODEV;
+
if (serial->dev->actconfig->desc.bConfigurationValue != 1) {
dev_err(&serial->dev->dev, "active config #%d != 1 ??\n",
serial->dev->actconfig->desc.bConfigurationValue);
diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c
index 8010aaeb5adb..dd0e84a9bd2f 100644
--- a/drivers/video/backlight/ltv350qv.c
+++ b/drivers/video/backlight/ltv350qv.c
@@ -239,11 +239,15 @@ static int __devinit ltv350qv_probe(struct spi_device *spi)
lcd->spi = spi;
lcd->power = FB_BLANK_POWERDOWN;
lcd->buffer = kzalloc(8, GFP_KERNEL);
+ if (!lcd->buffer) {
+ ret = -ENOMEM;
+ goto out_free_lcd;
+ }
ld = lcd_device_register("ltv350qv", &spi->dev, lcd, &ltv_ops);
if (IS_ERR(ld)) {
ret = PTR_ERR(ld);
- goto out_free_lcd;
+ goto out_free_buffer;
}
lcd->ld = ld;
@@ -257,6 +261,8 @@ static int __devinit ltv350qv_probe(struct spi_device *spi)
out_unregister:
lcd_device_unregister(ld);
+out_free_buffer:
+ kfree(lcd->buffer);
out_free_lcd:
kfree(lcd);
return ret;
@@ -268,6 +274,7 @@ static int __devexit ltv350qv_remove(struct spi_device *spi)
ltv350qv_power(lcd, FB_BLANK_POWERDOWN);
lcd_device_unregister(lcd->ld);
+ kfree(lcd->buffer);
kfree(lcd);
return 0;
diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c
index eca855a55c0d..3de4ba0260a5 100644
--- a/drivers/watchdog/cpwd.c
+++ b/drivers/watchdog/cpwd.c
@@ -646,7 +646,7 @@ static int __devexit cpwd_remove(struct platform_device *op)
struct cpwd *p = dev_get_drvdata(&op->dev);
int i;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < WD_NUMDEVS; i++) {
misc_deregister(&p->devs[i].misc);
if (!p->enabled) {
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 24b966d5061a..204a5603c4ae 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -710,7 +710,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
return 0;
}
-static void __devexit hpwdt_exit_nmi_decoding(void)
+static void hpwdt_exit_nmi_decoding(void)
{
unregister_die_notifier(&die_notifier);
if (cru_rom_addr)
@@ -726,7 +726,7 @@ static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev)
return 0;
}
-static void __devexit hpwdt_exit_nmi_decoding(void)
+static void hpwdt_exit_nmi_decoding(void)
{
}
#endif /* CONFIG_HPWDT_NMI_DECODING */
diff --git a/drivers/watchdog/sbc_fitpc2_wdt.c b/drivers/watchdog/sbc_fitpc2_wdt.c
index c7d67e9a7465..79906255eeb6 100644
--- a/drivers/watchdog/sbc_fitpc2_wdt.c
+++ b/drivers/watchdog/sbc_fitpc2_wdt.c
@@ -201,11 +201,14 @@ static struct miscdevice fitpc2_wdt_miscdev = {
static int __init fitpc2_wdt_init(void)
{
int err;
+ const char *brd_name;
- if (!strstr(dmi_get_system_info(DMI_BOARD_NAME), "SBC-FITPC2"))
+ brd_name = dmi_get_system_info(DMI_BOARD_NAME);
+
+ if (!brd_name || !strstr(brd_name, "SBC-FITPC2"))
return -ENODEV;
- pr_info("%s found\n", dmi_get_system_info(DMI_BOARD_NAME));
+ pr_info("%s found\n", brd_name);
if (!request_region(COMMAND_PORT, 1, WATCHDOG_NAME)) {
pr_err("I/O address 0x%04x already in use\n", COMMAND_PORT);
diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c
index 0461858e07d0..b61ab1c54293 100644
--- a/drivers/watchdog/sch311x_wdt.c
+++ b/drivers/watchdog/sch311x_wdt.c
@@ -508,7 +508,7 @@ static int __init sch311x_detect(int sio_config_port, unsigned short *addr)
sch311x_sio_outb(sio_config_port, 0x07, 0x0a);
/* Check if Logical Device Register is currently active */
- if (sch311x_sio_inb(sio_config_port, 0x30) && 0x01 == 0)
+ if ((sch311x_sio_inb(sio_config_port, 0x30) & 0x01) == 0)
printk(KERN_INFO PFX "Seems that LDN 0x0a is not active...\n");
/* Get the base address of the runtime registers */
diff --git a/drivers/watchdog/w83697ug_wdt.c b/drivers/watchdog/w83697ug_wdt.c
index a6c12dec91a1..df2a64dc9672 100644
--- a/drivers/watchdog/w83697ug_wdt.c
+++ b/drivers/watchdog/w83697ug_wdt.c
@@ -109,7 +109,7 @@ static int w83697ug_select_wd_register(void)
outb_p(0x08, WDT_EFDR); /* select logical device 8 (GPIO2) */
outb_p(0x30, WDT_EFER); /* select CR30 */
c = inb_p(WDT_EFDR);
- outb_p(c || 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */
+ outb_p(c | 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */
return 0;
}
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 43f9f02c7db0..718050ace08f 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -232,7 +232,7 @@ static int increase_reservation(unsigned long nr_pages)
set_phys_to_machine(pfn, frame_list[i]);
/* Link back into the page tables if not highmem. */
- if (pfn < max_low_pfn) {
+ if (!xen_hvm_domain() && pfn < max_low_pfn) {
int ret;
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
@@ -280,7 +280,7 @@ static int decrease_reservation(unsigned long nr_pages)
scrub_page(page);
- if (!PageHighMem(page)) {
+ if (!xen_hvm_domain() && !PageHighMem(page)) {
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
__pte_ma(0), 0);
@@ -296,7 +296,7 @@ static int decrease_reservation(unsigned long nr_pages)
/* No more mappings: invalidate P2M and add to balloon. */
for (i = 0; i < nr_pages; i++) {
pfn = mfn_to_pfn(frame_list[i]);
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ __set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
balloon_append(pfn_to_page(pfn));
}
@@ -392,15 +392,19 @@ static struct notifier_block xenstore_notifier;
static int __init balloon_init(void)
{
- unsigned long pfn, extra_pfn_end;
+ unsigned long pfn, nr_pages, extra_pfn_end;
struct page *page;
- if (!xen_pv_domain())
+ if (!xen_domain())
return -ENODEV;
pr_info("xen_balloon: Initialising balloon driver.\n");
- balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
+ if (xen_pv_domain())
+ nr_pages = xen_start_info->nr_pages;
+ else
+ nr_pages = max_pfn;
+ balloon_stats.current_pages = min(nr_pages, max_pfn);
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 74681478100a..0ad1699a1b3e 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -114,7 +114,7 @@ struct cpu_evtchn_s {
static __initdata struct cpu_evtchn_s init_evtchn_mask = {
.bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul,
};
-static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask;
+static struct cpu_evtchn_s __refdata *cpu_evtchn_mask_p = &init_evtchn_mask;
static inline unsigned long *cpu_evtchn_mask(int cpu)
{
@@ -277,7 +277,7 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
BUG_ON(irq == -1);
#ifdef CONFIG_SMP
- cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
+ cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
#endif
clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
@@ -294,7 +294,7 @@ static void init_evtchn_cpu_bindings(void)
/* By default all event channels notify CPU#0. */
for_each_irq_desc(i, desc) {
- cpumask_copy(desc->affinity, cpumask_of(0));
+ cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
}
#endif
@@ -376,81 +376,69 @@ static void unmask_evtchn(int port)
put_cpu();
}
-static int get_nr_hw_irqs(void)
+static int xen_allocate_irq_dynamic(void)
{
- int ret = 1;
+ int first = 0;
+ int irq;
#ifdef CONFIG_X86_IO_APIC
- ret = get_nr_irqs_gsi();
+ /*
+ * For an HVM guest or domain 0 which see "real" (emulated or
+ * actual repectively) GSIs we allocate dynamic IRQs
+ * e.g. those corresponding to event channels or MSIs
+ * etc. from the range above those "real" GSIs to avoid
+ * collisions.
+ */
+ if (xen_initial_domain() || xen_hvm_domain())
+ first = get_nr_irqs_gsi();
#endif
- return ret;
-}
+retry:
+ irq = irq_alloc_desc_from(first, -1);
-static int find_unbound_pirq(int type)
-{
- int rc, i;
- struct physdev_get_free_pirq op_get_free_pirq;
- op_get_free_pirq.type = type;
+ if (irq == -ENOMEM && first > NR_IRQS_LEGACY) {
+ printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n");
+ first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY);
+ goto retry;
+ }
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
- if (!rc)
- return op_get_free_pirq.pirq;
+ if (irq < 0)
+ panic("No available IRQ to bind to: increase nr_irqs!\n");
- for (i = 0; i < nr_irqs; i++) {
- if (pirq_to_irq[i] < 0)
- return i;
- }
- return -1;
+ return irq;
}
-static int find_unbound_irq(void)
+static int xen_allocate_irq_gsi(unsigned gsi)
{
- struct irq_data *data;
- int irq, res;
- int bottom = get_nr_hw_irqs();
- int top = nr_irqs-1;
-
- if (bottom == nr_irqs)
- goto no_irqs;
+ int irq;
- /* This loop starts from the top of IRQ space and goes down.
- * We need this b/c if we have a PCI device in a Xen PV guest
- * we do not have an IO-APIC (though the backend might have them)
- * mapped in. To not have a collision of physical IRQs with the Xen
- * event channels start at the top of the IRQ space for virtual IRQs.
+ /*
+ * A PV guest has no concept of a GSI (since it has no ACPI
+ * nor access to/knowledge of the physical APICs). Therefore
+ * all IRQs are dynamically allocated from the entire IRQ
+ * space.
*/
- for (irq = top; irq > bottom; irq--) {
- data = irq_get_irq_data(irq);
- /* only 15->0 have init'd desc; handle irq > 16 */
- if (!data)
- break;
- if (data->chip == &no_irq_chip)
- break;
- if (data->chip != &xen_dynamic_chip)
- continue;
- if (irq_info[irq].type == IRQT_UNBOUND)
- return irq;
- }
-
- if (irq == bottom)
- goto no_irqs;
+ if (xen_pv_domain() && !xen_initial_domain())
+ return xen_allocate_irq_dynamic();
- res = irq_alloc_desc_at(irq, -1);
+ /* Legacy IRQ descriptors are already allocated by the arch. */
+ if (gsi < NR_IRQS_LEGACY)
+ return gsi;
- if (WARN_ON(res != irq))
- return -1;
+ irq = irq_alloc_desc_at(gsi, -1);
+ if (irq < 0)
+ panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq);
return irq;
-
-no_irqs:
- panic("No available IRQ to bind to: increase nr_irqs!\n");
}
-static bool identity_mapped_irq(unsigned irq)
+static void xen_free_irq(unsigned irq)
{
- /* identity map all the hardware irqs */
- return irq < get_nr_hw_irqs();
+ /* Legacy IRQ descriptors are managed by the arch. */
+ if (irq < NR_IRQS_LEGACY)
+ return;
+
+ irq_free_desc(irq);
}
static void pirq_unmask_notify(int irq)
@@ -486,7 +474,7 @@ static bool probing_irq(int irq)
return desc && desc->action == NULL;
}
-static unsigned int startup_pirq(unsigned int irq)
+static unsigned int __startup_pirq(unsigned int irq)
{
struct evtchn_bind_pirq bind_pirq;
struct irq_info *info = info_for_irq(irq);
@@ -524,9 +512,15 @@ out:
return 0;
}
-static void shutdown_pirq(unsigned int irq)
+static unsigned int startup_pirq(struct irq_data *data)
+{
+ return __startup_pirq(data->irq);
+}
+
+static void shutdown_pirq(struct irq_data *data)
{
struct evtchn_close close;
+ unsigned int irq = data->irq;
struct irq_info *info = info_for_irq(irq);
int evtchn = evtchn_from_irq(irq);
@@ -546,20 +540,20 @@ static void shutdown_pirq(unsigned int irq)
info->evtchn = 0;
}
-static void enable_pirq(unsigned int irq)
+static void enable_pirq(struct irq_data *data)
{
- startup_pirq(irq);
+ startup_pirq(data);
}
-static void disable_pirq(unsigned int irq)
+static void disable_pirq(struct irq_data *data)
{
}
-static void ack_pirq(unsigned int irq)
+static void ack_pirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq(data->irq);
- move_native_irq(irq);
+ move_native_irq(data->irq);
if (VALID_EVTCHN(evtchn)) {
mask_evtchn(evtchn);
@@ -567,23 +561,6 @@ static void ack_pirq(unsigned int irq)
}
}
-static void end_pirq(unsigned int irq)
-{
- int evtchn = evtchn_from_irq(irq);
- struct irq_desc *desc = irq_to_desc(irq);
-
- if (WARN_ON(!desc))
- return;
-
- if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
- (IRQ_DISABLED|IRQ_PENDING)) {
- shutdown_pirq(irq);
- } else if (VALID_EVTCHN(evtchn)) {
- unmask_evtchn(evtchn);
- pirq_unmask_notify(irq);
- }
-}
-
static int find_irq_by_gsi(unsigned gsi)
{
int irq;
@@ -638,14 +615,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
goto out; /* XXX need refcount? */
}
- /* If we are a PV guest, we don't have GSIs (no ACPI passed). Therefore
- * we are using the !xen_initial_domain() to drop in the function.*/
- if (identity_mapped_irq(gsi) || (!xen_initial_domain() &&
- xen_pv_domain())) {
- irq = gsi;
- irq_alloc_desc_at(irq, -1);
- } else
- irq = find_unbound_irq();
+ irq = xen_allocate_irq_gsi(gsi);
set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
handle_level_irq, name);
@@ -658,7 +628,7 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name)
* this in the priv domain. */
if (xen_initial_domain() &&
HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
- irq_free_desc(irq);
+ xen_free_irq(irq);
irq = -ENOSPC;
goto out;
}
@@ -674,87 +644,46 @@ out:
}
#ifdef CONFIG_PCI_MSI
-#include <linux/msi.h>
-#include "../pci/msi.h"
-
-void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc)
+int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
{
- spin_lock(&irq_mapping_update_lock);
-
- if (alloc & XEN_ALLOC_IRQ) {
- *irq = find_unbound_irq();
- if (*irq == -1)
- goto out;
- }
-
- if (alloc & XEN_ALLOC_PIRQ) {
- *pirq = find_unbound_pirq(MAP_PIRQ_TYPE_MSI);
- if (*pirq == -1)
- goto out;
- }
+ int rc;
+ struct physdev_get_free_pirq op_get_free_pirq;
- set_irq_chip_and_handler_name(*irq, &xen_pirq_chip,
- handle_level_irq, name);
+ op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
- irq_info[*irq] = mk_pirq_info(0, *pirq, 0, 0);
- pirq_to_irq[*pirq] = *irq;
+ WARN_ONCE(rc == -ENOSYS,
+ "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
-out:
- spin_unlock(&irq_mapping_update_lock);
+ return rc ? -1 : op_get_free_pirq.pirq;
}
-int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type)
+int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ int pirq, int vector, const char *name)
{
- int irq = -1;
- struct physdev_map_pirq map_irq;
- int rc;
- int pos;
- u32 table_offset, bir;
-
- memset(&map_irq, 0, sizeof(map_irq));
- map_irq.domid = DOMID_SELF;
- map_irq.type = MAP_PIRQ_TYPE_MSI;
- map_irq.index = -1;
- map_irq.pirq = -1;
- map_irq.bus = dev->bus->number;
- map_irq.devfn = dev->devfn;
-
- if (type == PCI_CAP_ID_MSIX) {
- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
-
- pci_read_config_dword(dev, msix_table_offset_reg(pos),
- &table_offset);
- bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
-
- map_irq.table_base = pci_resource_start(dev, bir);
- map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
- }
+ int irq, ret;
spin_lock(&irq_mapping_update_lock);
- irq = find_unbound_irq();
-
+ irq = xen_allocate_irq_dynamic();
if (irq == -1)
goto out;
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
- if (rc) {
- printk(KERN_WARNING "xen map irq failed %d\n", rc);
-
- irq_free_desc(irq);
-
- irq = -1;
- goto out;
- }
- irq_info[irq] = mk_pirq_info(0, map_irq.pirq, 0, map_irq.index);
-
set_irq_chip_and_handler_name(irq, &xen_pirq_chip,
- handle_level_irq,
- (type == PCI_CAP_ID_MSIX) ? "msi-x":"msi");
+ handle_level_irq, name);
+ irq_info[irq] = mk_pirq_info(0, pirq, 0, vector);
+ pirq_to_irq[pirq] = irq;
+ ret = irq_set_msi_desc(irq, msidesc);
+ if (ret < 0)
+ goto error_irq;
out:
spin_unlock(&irq_mapping_update_lock);
return irq;
+error_irq:
+ spin_unlock(&irq_mapping_update_lock);
+ xen_free_irq(irq);
+ return -1;
}
#endif
@@ -779,11 +708,12 @@ int xen_destroy_irq(int irq)
printk(KERN_WARNING "unmap irq failed %d\n", rc);
goto out;
}
- pirq_to_irq[info->u.pirq.pirq] = -1;
}
+ pirq_to_irq[info->u.pirq.pirq] = -1;
+
irq_info[irq] = mk_unbound_info();
- irq_free_desc(irq);
+ xen_free_irq(irq);
out:
spin_unlock(&irq_mapping_update_lock);
@@ -814,7 +744,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
irq = evtchn_to_irq[evtchn];
if (irq == -1) {
- irq = find_unbound_irq();
+ irq = xen_allocate_irq_dynamic();
set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
handle_fasteoi_irq, "event");
@@ -839,7 +769,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
irq = per_cpu(ipi_to_irq, cpu)[ipi];
if (irq == -1) {
- irq = find_unbound_irq();
+ irq = xen_allocate_irq_dynamic();
if (irq < 0)
goto out;
@@ -875,7 +805,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
irq = per_cpu(virq_to_irq, cpu)[virq];
if (irq == -1) {
- irq = find_unbound_irq();
+ irq = xen_allocate_irq_dynamic();
set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
handle_percpu_irq, "virq");
@@ -934,7 +864,7 @@ static void unbind_from_irq(unsigned int irq)
if (irq_info[irq].type != IRQT_UNBOUND) {
irq_info[irq] = mk_unbound_info();
- irq_free_desc(irq);
+ xen_free_irq(irq);
}
spin_unlock(&irq_mapping_update_lock);
@@ -990,7 +920,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
if (irq < 0)
return irq;
- irqflags |= IRQF_NO_SUSPEND;
+ irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
if (retval != 0) {
unbind_from_irq(irq);
@@ -1234,11 +1164,12 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
return 0;
}
-static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
+static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
+ bool force)
{
unsigned tcpu = cpumask_first(dest);
- return rebind_irq_to_cpu(irq, tcpu);
+ return rebind_irq_to_cpu(data->irq, tcpu);
}
int resend_irq_on_evtchn(unsigned int irq)
@@ -1257,35 +1188,35 @@ int resend_irq_on_evtchn(unsigned int irq)
return 1;
}
-static void enable_dynirq(unsigned int irq)
+static void enable_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq(data->irq);
if (VALID_EVTCHN(evtchn))
unmask_evtchn(evtchn);
}
-static void disable_dynirq(unsigned int irq)
+static void disable_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq(data->irq);
if (VALID_EVTCHN(evtchn))
mask_evtchn(evtchn);
}
-static void ack_dynirq(unsigned int irq)
+static void ack_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq(data->irq);
- move_masked_irq(irq);
+ move_masked_irq(data->irq);
if (VALID_EVTCHN(evtchn))
unmask_evtchn(evtchn);
}
-static int retrigger_dynirq(unsigned int irq)
+static int retrigger_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq(data->irq);
struct shared_info *sh = HYPERVISOR_shared_info;
int ret = 0;
@@ -1334,7 +1265,7 @@ static void restore_cpu_pirqs(void)
printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
- startup_pirq(irq);
+ __startup_pirq(irq);
}
}
@@ -1445,7 +1376,6 @@ void xen_poll_irq(int irq)
void xen_irq_resume(void)
{
unsigned int cpu, irq, evtchn;
- struct irq_desc *desc;
init_evtchn_cpu_bindings();
@@ -1465,66 +1395,48 @@ void xen_irq_resume(void)
restore_cpu_ipis(cpu);
}
- /*
- * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
- * are not handled by the IRQ core.
- */
- for_each_irq_desc(irq, desc) {
- if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
- continue;
- if (desc->status & IRQ_DISABLED)
- continue;
-
- evtchn = evtchn_from_irq(irq);
- if (evtchn == -1)
- continue;
-
- unmask_evtchn(evtchn);
- }
-
restore_cpu_pirqs();
}
static struct irq_chip xen_dynamic_chip __read_mostly = {
- .name = "xen-dyn",
+ .name = "xen-dyn",
- .disable = disable_dynirq,
- .mask = disable_dynirq,
- .unmask = enable_dynirq,
+ .irq_disable = disable_dynirq,
+ .irq_mask = disable_dynirq,
+ .irq_unmask = enable_dynirq,
- .eoi = ack_dynirq,
- .set_affinity = set_affinity_irq,
- .retrigger = retrigger_dynirq,
+ .irq_eoi = ack_dynirq,
+ .irq_set_affinity = set_affinity_irq,
+ .irq_retrigger = retrigger_dynirq,
};
static struct irq_chip xen_pirq_chip __read_mostly = {
- .name = "xen-pirq",
+ .name = "xen-pirq",
- .startup = startup_pirq,
- .shutdown = shutdown_pirq,
+ .irq_startup = startup_pirq,
+ .irq_shutdown = shutdown_pirq,
- .enable = enable_pirq,
- .unmask = enable_pirq,
+ .irq_enable = enable_pirq,
+ .irq_unmask = enable_pirq,
- .disable = disable_pirq,
- .mask = disable_pirq,
+ .irq_disable = disable_pirq,
+ .irq_mask = disable_pirq,
- .ack = ack_pirq,
- .end = end_pirq,
+ .irq_ack = ack_pirq,
- .set_affinity = set_affinity_irq,
+ .irq_set_affinity = set_affinity_irq,
- .retrigger = retrigger_dynirq,
+ .irq_retrigger = retrigger_dynirq,
};
static struct irq_chip xen_percpu_chip __read_mostly = {
- .name = "xen-percpu",
+ .name = "xen-percpu",
- .disable = disable_dynirq,
- .mask = disable_dynirq,
- .unmask = enable_dynirq,
+ .irq_disable = disable_dynirq,
+ .irq_mask = disable_dynirq,
+ .irq_unmask = enable_dynirq,
- .ack = ack_dynirq,
+ .irq_ack = ack_dynirq,
};
int xen_set_callback_via(uint64_t via)
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index db8c4c4ac880..ebb292859b59 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -34,32 +34,38 @@ enum shutdown_state {
/* Ignore multiple shutdown requests. */
static enum shutdown_state shutting_down = SHUTDOWN_INVALID;
-#ifdef CONFIG_PM_SLEEP
-static int xen_hvm_suspend(void *data)
-{
- struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
- int *cancelled = data;
-
- BUG_ON(!irqs_disabled());
-
- *cancelled = HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
+struct suspend_info {
+ int cancelled;
+ unsigned long arg; /* extra hypercall argument */
+ void (*pre)(void);
+ void (*post)(int cancelled);
+};
- xen_hvm_post_suspend(*cancelled);
+static void xen_hvm_post_suspend(int cancelled)
+{
+ xen_arch_hvm_post_suspend(cancelled);
gnttab_resume();
+}
- if (!*cancelled) {
- xen_irq_resume();
- xen_console_resume();
- xen_timer_resume();
- }
+static void xen_pre_suspend(void)
+{
+ xen_mm_pin_all();
+ gnttab_suspend();
+ xen_arch_pre_suspend();
+}
- return 0;
+static void xen_post_suspend(int cancelled)
+{
+ xen_arch_post_suspend(cancelled);
+ gnttab_resume();
+ xen_mm_unpin_all();
}
+#ifdef CONFIG_PM_SLEEP
static int xen_suspend(void *data)
{
+ struct suspend_info *si = data;
int err;
- int *cancelled = data;
BUG_ON(!irqs_disabled());
@@ -70,22 +76,20 @@ static int xen_suspend(void *data)
return err;
}
- xen_mm_pin_all();
- gnttab_suspend();
- xen_pre_suspend();
+ if (si->pre)
+ si->pre();
/*
* This hypercall returns 1 if suspend was cancelled
* or the domain was merely checkpointed, and 0 if it
* is resuming in a new domain.
*/
- *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+ si->cancelled = HYPERVISOR_suspend(si->arg);
- xen_post_suspend(*cancelled);
- gnttab_resume();
- xen_mm_unpin_all();
+ if (si->post)
+ si->post(si->cancelled);
- if (!*cancelled) {
+ if (!si->cancelled) {
xen_irq_resume();
xen_console_resume();
xen_timer_resume();
@@ -99,7 +103,7 @@ static int xen_suspend(void *data)
static void do_suspend(void)
{
int err;
- int cancelled = 1;
+ struct suspend_info si;
shutting_down = SHUTDOWN_SUSPEND;
@@ -129,20 +133,29 @@ static void do_suspend(void)
goto out_resume;
}
- if (xen_hvm_domain())
- err = stop_machine(xen_hvm_suspend, &cancelled, cpumask_of(0));
- else
- err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
+ si.cancelled = 1;
+
+ if (xen_hvm_domain()) {
+ si.arg = 0UL;
+ si.pre = NULL;
+ si.post = &xen_hvm_post_suspend;
+ } else {
+ si.arg = virt_to_mfn(xen_start_info);
+ si.pre = &xen_pre_suspend;
+ si.post = &xen_post_suspend;
+ }
+
+ err = stop_machine(xen_suspend, &si, cpumask_of(0));
dpm_resume_noirq(PMSG_RESUME);
if (err) {
printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
- cancelled = 1;
+ si.cancelled = 1;
}
out_resume:
- if (!cancelled) {
+ if (!si.cancelled) {
xen_arch_resume();
xs_resume();
} else
@@ -162,12 +175,39 @@ out:
}
#endif /* CONFIG_PM_SLEEP */
+struct shutdown_handler {
+ const char *command;
+ void (*cb)(void);
+};
+
+static void do_poweroff(void)
+{
+ shutting_down = SHUTDOWN_POWEROFF;
+ orderly_poweroff(false);
+}
+
+static void do_reboot(void)
+{
+ shutting_down = SHUTDOWN_POWEROFF; /* ? */
+ ctrl_alt_del();
+}
+
static void shutdown_handler(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
char *str;
struct xenbus_transaction xbt;
int err;
+ static struct shutdown_handler handlers[] = {
+ { "poweroff", do_poweroff },
+ { "halt", do_poweroff },
+ { "reboot", do_reboot },
+#ifdef CONFIG_PM_SLEEP
+ { "suspend", do_suspend },
+#endif
+ {NULL, NULL},
+ };
+ static struct shutdown_handler *handler;
if (shutting_down != SHUTDOWN_INVALID)
return;
@@ -184,7 +224,14 @@ static void shutdown_handler(struct xenbus_watch *watch,
return;
}
- xenbus_write(xbt, "control", "shutdown", "");
+ for (handler = &handlers[0]; handler->command; handler++) {
+ if (strcmp(str, handler->command) == 0)
+ break;
+ }
+
+ /* Only acknowledge commands which we are prepared to handle. */
+ if (handler->cb)
+ xenbus_write(xbt, "control", "shutdown", "");
err = xenbus_transaction_end(xbt, 0);
if (err == -EAGAIN) {
@@ -192,17 +239,8 @@ static void shutdown_handler(struct xenbus_watch *watch,
goto again;
}
- if (strcmp(str, "poweroff") == 0 ||
- strcmp(str, "halt") == 0) {
- shutting_down = SHUTDOWN_POWEROFF;
- orderly_poweroff(false);
- } else if (strcmp(str, "reboot") == 0) {
- shutting_down = SHUTDOWN_POWEROFF; /* ? */
- ctrl_alt_del();
-#ifdef CONFIG_PM_SLEEP
- } else if (strcmp(str, "suspend") == 0) {
- do_suspend();
-#endif
+ if (handler->cb) {
+ handler->cb();
} else {
printk(KERN_INFO "Ignoring shutdown request: %s\n", str);
shutting_down = SHUTDOWN_INVALID;
@@ -281,27 +319,18 @@ static int shutdown_event(struct notifier_block *notifier,
return NOTIFY_DONE;
}
-static int __init __setup_shutdown_event(void)
-{
- /* Delay initialization in the PV on HVM case */
- if (xen_hvm_domain())
- return 0;
-
- if (!xen_pv_domain())
- return -ENODEV;
-
- return xen_setup_shutdown_event();
-}
-
int xen_setup_shutdown_event(void)
{
static struct notifier_block xenstore_notifier = {
.notifier_call = shutdown_event
};
+
+ if (!xen_domain())
+ return -ENODEV;
register_xenstore_notifier(&xenstore_notifier);
return 0;
}
EXPORT_SYMBOL_GPL(xen_setup_shutdown_event);
-subsys_initcall(__setup_shutdown_event);
+subsys_initcall(xen_setup_shutdown_event);
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index afbe041f42c5..319dd0a94d51 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -156,9 +156,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev,
if (ret)
goto out;
xenbus_probe(NULL);
- ret = xen_setup_shutdown_event();
- if (ret)
- goto out;
return 0;
out:
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 02a2cf616318..515455296378 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -21,8 +21,8 @@
#include <linux/posix_acl_xattr.h>
#include "xattr.h"
#include "acl.h"
-#include "v9fs_vfs.h"
#include "v9fs.h"
+#include "v9fs_vfs.h"
static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
{
@@ -59,7 +59,8 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
struct v9fs_session_info *v9ses;
v9ses = v9fs_inode2v9ses(inode);
- if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
+ if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
+ ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL);
set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
return 0;
@@ -71,11 +72,15 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
set_cached_acl(inode, ACL_TYPE_ACCESS, pacl);
- posix_acl_release(dacl);
- posix_acl_release(pacl);
} else
retval = -EIO;
+ if (!IS_ERR(dacl))
+ posix_acl_release(dacl);
+
+ if (!IS_ERR(pacl))
+ posix_acl_release(pacl);
+
return retval;
}
@@ -100,9 +105,10 @@ int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
return -ECHILD;
v9ses = v9fs_inode2v9ses(inode);
- if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
+ if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
+ ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
/*
- * On access = client mode get the acl
+ * On access = client and acl = on mode get the acl
* values from the server
*/
return 0;
@@ -128,6 +134,10 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
struct inode *inode = dentry->d_inode;
set_cached_acl(inode, type, acl);
+
+ if (!acl)
+ return 0;
+
/* Set a setxattr request to server */
size = posix_acl_xattr_size(acl->a_count);
buffer = kmalloc(size, GFP_KERNEL);
@@ -177,10 +187,8 @@ int v9fs_acl_chmod(struct dentry *dentry)
int v9fs_set_create_acl(struct dentry *dentry,
struct posix_acl *dpacl, struct posix_acl *pacl)
{
- if (dpacl)
- v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
- if (pacl)
- v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
+ v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
+ v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
posix_acl_release(dpacl);
posix_acl_release(pacl);
return 0;
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 0dbe0d139ac2..5b335c5086a1 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -33,67 +33,11 @@
#define CACHETAG_LEN 11
-struct kmem_cache *vcookie_cache;
-
struct fscache_netfs v9fs_cache_netfs = {
.name = "9p",
.version = 0,
};
-static void init_once(void *foo)
-{
- struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo;
- vcookie->fscache = NULL;
- vcookie->qid = NULL;
- inode_init_once(&vcookie->inode);
-}
-
-/**
- * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain
- * vcookie to inode mapping
- *
- * Returns 0 on success.
- */
-
-static int v9fs_init_vcookiecache(void)
-{
- vcookie_cache = kmem_cache_create("vcookie_cache",
- sizeof(struct v9fs_cookie),
- 0, (SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
- init_once);
- if (!vcookie_cache)
- return -ENOMEM;
-
- return 0;
-}
-
-/**
- * v9fs_destroy_vcookiecache - destroy the cache of vcookies
- *
- */
-
-static void v9fs_destroy_vcookiecache(void)
-{
- kmem_cache_destroy(vcookie_cache);
-}
-
-int __v9fs_cache_register(void)
-{
- int ret;
- ret = v9fs_init_vcookiecache();
- if (ret < 0)
- return ret;
-
- return fscache_register_netfs(&v9fs_cache_netfs);
-}
-
-void __v9fs_cache_unregister(void)
-{
- v9fs_destroy_vcookiecache();
- fscache_unregister_netfs(&v9fs_cache_netfs);
-}
-
/**
* v9fs_random_cachetag - Generate a random tag to be associated
* with a new cache session.
@@ -133,9 +77,9 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
}
const struct fscache_cookie_def v9fs_cache_session_index_def = {
- .name = "9P.session",
- .type = FSCACHE_COOKIE_TYPE_INDEX,
- .get_key = v9fs_cache_session_get_key,
+ .name = "9P.session",
+ .type = FSCACHE_COOKIE_TYPE_INDEX,
+ .get_key = v9fs_cache_session_get_key,
};
void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
@@ -163,33 +107,33 @@ void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
void *buffer, uint16_t bufmax)
{
- const struct v9fs_cookie *vcookie = cookie_netfs_data;
- memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path));
-
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode,
- vcookie->qid->path);
- return sizeof(vcookie->qid->path);
+ const struct v9fs_inode *v9inode = cookie_netfs_data;
+ memcpy(buffer, &v9inode->fscache_key->path,
+ sizeof(v9inode->fscache_key->path));
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode,
+ v9inode->fscache_key->path);
+ return sizeof(v9inode->fscache_key->path);
}
static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
uint64_t *size)
{
- const struct v9fs_cookie *vcookie = cookie_netfs_data;
- *size = i_size_read(&vcookie->inode);
+ const struct v9fs_inode *v9inode = cookie_netfs_data;
+ *size = i_size_read(&v9inode->vfs_inode);
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode,
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &v9inode->vfs_inode,
*size);
}
static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
void *buffer, uint16_t buflen)
{
- const struct v9fs_cookie *vcookie = cookie_netfs_data;
- memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version));
-
- P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode,
- vcookie->qid->version);
- return sizeof(vcookie->qid->version);
+ const struct v9fs_inode *v9inode = cookie_netfs_data;
+ memcpy(buffer, &v9inode->fscache_key->version,
+ sizeof(v9inode->fscache_key->version));
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode,
+ v9inode->fscache_key->version);
+ return sizeof(v9inode->fscache_key->version);
}
static enum
@@ -197,13 +141,13 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
const void *buffer,
uint16_t buflen)
{
- const struct v9fs_cookie *vcookie = cookie_netfs_data;
+ const struct v9fs_inode *v9inode = cookie_netfs_data;
- if (buflen != sizeof(vcookie->qid->version))
+ if (buflen != sizeof(v9inode->fscache_key->version))
return FSCACHE_CHECKAUX_OBSOLETE;
- if (memcmp(buffer, &vcookie->qid->version,
- sizeof(vcookie->qid->version)))
+ if (memcmp(buffer, &v9inode->fscache_key->version,
+ sizeof(v9inode->fscache_key->version)))
return FSCACHE_CHECKAUX_OBSOLETE;
return FSCACHE_CHECKAUX_OKAY;
@@ -211,7 +155,7 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
{
- struct v9fs_cookie *vcookie = cookie_netfs_data;
+ struct v9fs_inode *v9inode = cookie_netfs_data;
struct pagevec pvec;
pgoff_t first;
int loop, nr_pages;
@@ -220,7 +164,7 @@ static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
first = 0;
for (;;) {
- nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping,
+ nr_pages = pagevec_lookup(&pvec, v9inode->vfs_inode.i_mapping,
first,
PAGEVEC_SIZE - pagevec_count(&pvec));
if (!nr_pages)
@@ -249,115 +193,114 @@ const struct fscache_cookie_def v9fs_cache_inode_index_def = {
void v9fs_cache_inode_get_cookie(struct inode *inode)
{
- struct v9fs_cookie *vcookie;
+ struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
if (!S_ISREG(inode->i_mode))
return;
- vcookie = v9fs_inode2cookie(inode);
- if (vcookie->fscache)
+ v9inode = V9FS_I(inode);
+ if (v9inode->fscache)
return;
v9ses = v9fs_inode2v9ses(inode);
- vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+ v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
- vcookie);
+ v9inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
- vcookie->fscache);
+ v9inode->fscache);
}
void v9fs_cache_inode_put_cookie(struct inode *inode)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return;
P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
- vcookie->fscache);
+ v9inode->fscache);
- fscache_relinquish_cookie(vcookie->fscache, 0);
- vcookie->fscache = NULL;
+ fscache_relinquish_cookie(v9inode->fscache, 0);
+ v9inode->fscache = NULL;
}
void v9fs_cache_inode_flush_cookie(struct inode *inode)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return;
P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
- vcookie->fscache);
+ v9inode->fscache);
- fscache_relinquish_cookie(vcookie->fscache, 1);
- vcookie->fscache = NULL;
+ fscache_relinquish_cookie(v9inode->fscache, 1);
+ v9inode->fscache = NULL;
}
void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
struct p9_fid *fid;
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return;
- spin_lock(&vcookie->lock);
+ spin_lock(&v9inode->fscache_lock);
fid = filp->private_data;
if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
v9fs_cache_inode_flush_cookie(inode);
else
v9fs_cache_inode_get_cookie(inode);
- spin_unlock(&vcookie->lock);
+ spin_unlock(&v9inode->fscache_lock);
}
void v9fs_cache_inode_reset_cookie(struct inode *inode)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
struct v9fs_session_info *v9ses;
struct fscache_cookie *old;
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return;
- old = vcookie->fscache;
+ old = v9inode->fscache;
- spin_lock(&vcookie->lock);
- fscache_relinquish_cookie(vcookie->fscache, 1);
+ spin_lock(&v9inode->fscache_lock);
+ fscache_relinquish_cookie(v9inode->fscache, 1);
v9ses = v9fs_inode2v9ses(inode);
- vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+ v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
&v9fs_cache_inode_index_def,
- vcookie);
-
+ v9inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
- inode, old, vcookie->fscache);
+ inode, old, v9inode->fscache);
- spin_unlock(&vcookie->lock);
+ spin_unlock(&v9inode->fscache_lock);
}
int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
{
struct inode *inode = page->mapping->host;
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
- BUG_ON(!vcookie->fscache);
+ BUG_ON(!v9inode->fscache);
- return fscache_maybe_release_page(vcookie->fscache, page, gfp);
+ return fscache_maybe_release_page(v9inode->fscache, page, gfp);
}
void __v9fs_fscache_invalidate_page(struct page *page)
{
struct inode *inode = page->mapping->host;
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
- BUG_ON(!vcookie->fscache);
+ BUG_ON(!v9inode->fscache);
if (PageFsCache(page)) {
- fscache_wait_on_page_write(vcookie->fscache, page);
+ fscache_wait_on_page_write(v9inode->fscache, page);
BUG_ON(!PageLocked(page));
- fscache_uncache_page(vcookie->fscache, page);
+ fscache_uncache_page(v9inode->fscache, page);
}
}
@@ -380,13 +323,13 @@ static void v9fs_vfs_readpage_complete(struct page *page, void *data,
int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
{
int ret;
- const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ const struct v9fs_inode *v9inode = V9FS_I(inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return -ENOBUFS;
- ret = fscache_read_or_alloc_page(vcookie->fscache,
+ ret = fscache_read_or_alloc_page(v9inode->fscache,
page,
v9fs_vfs_readpage_complete,
NULL,
@@ -418,13 +361,13 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
unsigned *nr_pages)
{
int ret;
- const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ const struct v9fs_inode *v9inode = V9FS_I(inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
- if (!vcookie->fscache)
+ if (!v9inode->fscache)
return -ENOBUFS;
- ret = fscache_read_or_alloc_pages(vcookie->fscache,
+ ret = fscache_read_or_alloc_pages(v9inode->fscache,
mapping, pages, nr_pages,
v9fs_vfs_readpage_complete,
NULL,
@@ -453,11 +396,22 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
{
int ret;
- const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+ const struct v9fs_inode *v9inode = V9FS_I(inode);
P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
- ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL);
+ ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL);
P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret);
if (ret != 0)
v9fs_uncache_page(inode, page);
}
+
+/*
+ * wait for a page to complete writing to the cache
+ */
+void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
+{
+ const struct v9fs_inode *v9inode = V9FS_I(inode);
+ P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+ if (PageFsCache(page))
+ fscache_wait_on_page_write(v9inode->fscache, page);
+}
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index a94192bfaee8..049507a5b01c 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -25,20 +25,6 @@
#include <linux/fscache.h>
#include <linux/spinlock.h>
-extern struct kmem_cache *vcookie_cache;
-
-struct v9fs_cookie {
- spinlock_t lock;
- struct inode inode;
- struct fscache_cookie *fscache;
- struct p9_qid *qid;
-};
-
-static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode)
-{
- return container_of(inode, struct v9fs_cookie, inode);
-}
-
extern struct fscache_netfs v9fs_cache_netfs;
extern const struct fscache_cookie_def v9fs_cache_session_index_def;
extern const struct fscache_cookie_def v9fs_cache_inode_index_def;
@@ -64,23 +50,8 @@ extern int __v9fs_readpages_from_fscache(struct inode *inode,
struct list_head *pages,
unsigned *nr_pages);
extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page);
-
-
-/**
- * v9fs_cache_register - Register v9fs file system with the cache
- */
-static inline int v9fs_cache_register(void)
-{
- return __v9fs_cache_register();
-}
-
-/**
- * v9fs_cache_unregister - Unregister v9fs from the cache
- */
-static inline void v9fs_cache_unregister(void)
-{
- __v9fs_cache_unregister();
-}
+extern void __v9fs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page);
static inline int v9fs_fscache_release_page(struct page *page,
gfp_t gfp)
@@ -117,28 +88,27 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
- fscache_uncache_page(vcookie->fscache, page);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ fscache_uncache_page(v9inode->fscache, page);
BUG_ON(PageFsCache(page));
}
-static inline void v9fs_vcookie_set_qid(struct inode *inode,
+static inline void v9fs_fscache_set_key(struct inode *inode,
struct p9_qid *qid)
{
- struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
- spin_lock(&vcookie->lock);
- vcookie->qid = qid;
- spin_unlock(&vcookie->lock);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ spin_lock(&v9inode->fscache_lock);
+ v9inode->fscache_key = qid;
+ spin_unlock(&v9inode->fscache_lock);
}
-#else /* CONFIG_9P_FSCACHE */
-
-static inline int v9fs_cache_register(void)
+static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page)
{
- return 1;
+ return __v9fs_fscache_wait_on_page_write(inode, page);
}
-static inline void v9fs_cache_unregister(void) {}
+#else /* CONFIG_9P_FSCACHE */
static inline int v9fs_fscache_release_page(struct page *page,
gfp_t gfp) {
@@ -168,9 +138,11 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
{}
-static inline void v9fs_vcookie_set_qid(struct inode *inode,
- struct p9_qid *qid)
-{}
+static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
+ struct page *page)
+{
+ return;
+}
#endif /* CONFIG_9P_FSCACHE */
#endif /* _9P_CACHE_H */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index b00223c99d70..cd63e002d826 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -125,46 +125,17 @@ err_out:
return -ENOMEM;
}
-/**
- * v9fs_fid_lookup - lookup for a fid, try to walk if not found
- * @dentry: dentry to look for fid in
- *
- * Look for a fid in the specified dentry for the current user.
- * If no fid is found, try to create one walking from a fid from the parent
- * dentry (if it has one), or the root dentry. If the user haven't accessed
- * the fs yet, attach now and walk from the root.
- */
-
-struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
+static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
+ uid_t uid, int any)
{
- int i, n, l, clone, any, access;
- u32 uid;
- struct p9_fid *fid, *old_fid = NULL;
struct dentry *ds;
- struct v9fs_session_info *v9ses;
char **wnames, *uname;
+ int i, n, l, clone, access;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid, *old_fid = NULL;
v9ses = v9fs_inode2v9ses(dentry->d_inode);
access = v9ses->flags & V9FS_ACCESS_MASK;
- switch (access) {
- case V9FS_ACCESS_SINGLE:
- case V9FS_ACCESS_USER:
- case V9FS_ACCESS_CLIENT:
- uid = current_fsuid();
- any = 0;
- break;
-
- case V9FS_ACCESS_ANY:
- uid = v9ses->uid;
- any = 1;
- break;
-
- default:
- uid = ~0;
- any = 0;
- break;
- }
-
fid = v9fs_fid_find(dentry, uid, any);
if (fid)
return fid;
@@ -250,6 +221,45 @@ err_out:
return fid;
}
+/**
+ * v9fs_fid_lookup - lookup for a fid, try to walk if not found
+ * @dentry: dentry to look for fid in
+ *
+ * Look for a fid in the specified dentry for the current user.
+ * If no fid is found, try to create one walking from a fid from the parent
+ * dentry (if it has one), or the root dentry. If the user haven't accessed
+ * the fs yet, attach now and walk from the root.
+ */
+
+struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
+{
+ uid_t uid;
+ int any, access;
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ access = v9ses->flags & V9FS_ACCESS_MASK;
+ switch (access) {
+ case V9FS_ACCESS_SINGLE:
+ case V9FS_ACCESS_USER:
+ case V9FS_ACCESS_CLIENT:
+ uid = current_fsuid();
+ any = 0;
+ break;
+
+ case V9FS_ACCESS_ANY:
+ uid = v9ses->uid;
+ any = 1;
+ break;
+
+ default:
+ uid = ~0;
+ any = 0;
+ break;
+ }
+ return v9fs_fid_lookup_with_uid(dentry, uid, any);
+}
+
struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
{
struct p9_fid *fid, *ret;
@@ -261,3 +271,39 @@ struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
ret = p9_client_walk(fid, 0, NULL, 1);
return ret;
}
+
+static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
+{
+ struct p9_fid *fid, *ret;
+
+ fid = v9fs_fid_lookup_with_uid(dentry, uid, 0);
+ if (IS_ERR(fid))
+ return fid;
+
+ ret = p9_client_walk(fid, 0, NULL, 1);
+ return ret;
+}
+
+struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
+{
+ int err;
+ struct p9_fid *fid;
+
+ fid = v9fs_fid_clone_with_uid(dentry, 0);
+ if (IS_ERR(fid))
+ goto error_out;
+ /*
+ * writeback fid will only be used to write back the
+ * dirty pages. We always request for the open fid in read-write
+ * mode so that a partial page write which result in page
+ * read can work.
+ */
+ err = p9_client_open(fid, O_RDWR);
+ if (err < 0) {
+ p9_client_clunk(fid);
+ fid = ERR_PTR(err);
+ goto error_out;
+ }
+error_out:
+ return fid;
+}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index c3bbd6af996d..bb0b6e7f58fc 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -19,7 +19,8 @@
* Boston, MA 02111-1301 USA
*
*/
-
+#ifndef FS_9P_FID_H
+#define FS_9P_FID_H
#include <linux/list.h>
/**
@@ -45,3 +46,5 @@ struct v9fs_dentry {
struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
+struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
+#endif
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 2f77cd33ba83..c82b017f51f3 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -39,6 +39,7 @@
static DEFINE_SPINLOCK(v9fs_sessionlist_lock);
static LIST_HEAD(v9fs_sessionlist);
+struct kmem_cache *v9fs_inode_cache;
/*
* Option Parsing (code inspired by NFS code)
@@ -55,7 +56,7 @@ enum {
/* Cache options */
Opt_cache_loose, Opt_fscache,
/* Access options */
- Opt_access,
+ Opt_access, Opt_posixacl,
/* Error token */
Opt_err
};
@@ -73,6 +74,7 @@ static const match_table_t tokens = {
{Opt_fscache, "fscache"},
{Opt_cachetag, "cachetag=%s"},
{Opt_access, "access=%s"},
+ {Opt_posixacl, "posixacl"},
{Opt_err, NULL}
};
@@ -194,15 +196,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
else if (strcmp(s, "any") == 0)
v9ses->flags |= V9FS_ACCESS_ANY;
else if (strcmp(s, "client") == 0) {
-#ifdef CONFIG_9P_FS_POSIX_ACL
v9ses->flags |= V9FS_ACCESS_CLIENT;
-#else
- P9_DPRINTK(P9_DEBUG_ERROR,
- "access=client option not supported\n");
- kfree(s);
- ret = -EINVAL;
- goto free_and_return;
-#endif
} else {
v9ses->flags |= V9FS_ACCESS_SINGLE;
v9ses->uid = simple_strtoul(s, &e, 10);
@@ -212,6 +206,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
kfree(s);
break;
+ case Opt_posixacl:
+#ifdef CONFIG_9P_FS_POSIX_ACL
+ v9ses->flags |= V9FS_POSIX_ACL;
+#else
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "Not defined CONFIG_9P_FS_POSIX_ACL. "
+ "Ignoring posixacl option\n");
+#endif
+ break;
+
default:
continue;
}
@@ -260,19 +264,12 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
list_add(&v9ses->slist, &v9fs_sessionlist);
spin_unlock(&v9fs_sessionlist_lock);
- v9ses->flags = V9FS_ACCESS_USER;
strcpy(v9ses->uname, V9FS_DEFUSER);
strcpy(v9ses->aname, V9FS_DEFANAME);
v9ses->uid = ~0;
v9ses->dfltuid = V9FS_DEFUID;
v9ses->dfltgid = V9FS_DEFGID;
- rc = v9fs_parse_options(v9ses, data);
- if (rc < 0) {
- retval = rc;
- goto error;
- }
-
v9ses->clnt = p9_client_create(dev_name, data);
if (IS_ERR(v9ses->clnt)) {
retval = PTR_ERR(v9ses->clnt);
@@ -281,10 +278,20 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
goto error;
}
- if (p9_is_proto_dotl(v9ses->clnt))
+ v9ses->flags = V9FS_ACCESS_USER;
+
+ if (p9_is_proto_dotl(v9ses->clnt)) {
+ v9ses->flags = V9FS_ACCESS_CLIENT;
v9ses->flags |= V9FS_PROTO_2000L;
- else if (p9_is_proto_dotu(v9ses->clnt))
+ } else if (p9_is_proto_dotu(v9ses->clnt)) {
v9ses->flags |= V9FS_PROTO_2000U;
+ }
+
+ rc = v9fs_parse_options(v9ses, data);
+ if (rc < 0) {
+ retval = rc;
+ goto error;
+ }
v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
@@ -306,6 +313,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
v9ses->flags |= V9FS_ACCESS_ANY;
v9ses->uid = ~0;
}
+ if (!v9fs_proto_dotl(v9ses) ||
+ !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
+ /*
+ * We support ACL checks on clinet only if the protocol is
+ * 9P2000.L and access is V9FS_ACCESS_CLIENT.
+ */
+ v9ses->flags &= ~V9FS_ACL_MASK;
+ }
fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0,
v9ses->aname);
@@ -467,6 +482,63 @@ static void v9fs_sysfs_cleanup(void)
kobject_put(v9fs_kobj);
}
+static void v9fs_inode_init_once(void *foo)
+{
+ struct v9fs_inode *v9inode = (struct v9fs_inode *)foo;
+#ifdef CONFIG_9P_FSCACHE
+ v9inode->fscache = NULL;
+ v9inode->fscache_key = NULL;
+#endif
+ inode_init_once(&v9inode->vfs_inode);
+}
+
+/**
+ * v9fs_init_inode_cache - initialize a cache for 9P
+ * Returns 0 on success.
+ */
+static int v9fs_init_inode_cache(void)
+{
+ v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache",
+ sizeof(struct v9fs_inode),
+ 0, (SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD),
+ v9fs_inode_init_once);
+ if (!v9fs_inode_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * v9fs_destroy_inode_cache - destroy the cache of 9P inode
+ *
+ */
+static void v9fs_destroy_inode_cache(void)
+{
+ kmem_cache_destroy(v9fs_inode_cache);
+}
+
+static int v9fs_cache_register(void)
+{
+ int ret;
+ ret = v9fs_init_inode_cache();
+ if (ret < 0)
+ return ret;
+#ifdef CONFIG_9P_FSCACHE
+ return fscache_register_netfs(&v9fs_cache_netfs);
+#else
+ return ret;
+#endif
+}
+
+static void v9fs_cache_unregister(void)
+{
+ v9fs_destroy_inode_cache();
+#ifdef CONFIG_9P_FSCACHE
+ fscache_unregister_netfs(&v9fs_cache_netfs);
+#endif
+}
+
/**
* init_v9fs - Initialize module
*
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index c4b5d8864f0d..bd8496db135b 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,9 @@
* Boston, MA 02111-1301 USA
*
*/
+#ifndef FS_9P_V9FS_H
+#define FS_9P_V9FS_H
+
#include <linux/backing-dev.h>
/**
@@ -28,8 +31,10 @@
* @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions
* @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
* @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
+ * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client.
* @V9FS_ACCESS_ANY: use a single attach for all users
* @V9FS_ACCESS_MASK: bit mask of different ACCESS options
+ * @V9FS_POSIX_ACL: POSIX ACLs are enforced
*
* Session flags reflect options selected by users at mount time
*/
@@ -37,13 +42,15 @@
V9FS_ACCESS_USER | \
V9FS_ACCESS_CLIENT)
#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY
+#define V9FS_ACL_MASK V9FS_POSIX_ACL
enum p9_session_flags {
V9FS_PROTO_2000U = 0x01,
V9FS_PROTO_2000L = 0x02,
V9FS_ACCESS_SINGLE = 0x04,
V9FS_ACCESS_USER = 0x08,
- V9FS_ACCESS_CLIENT = 0x10
+ V9FS_ACCESS_CLIENT = 0x10,
+ V9FS_POSIX_ACL = 0x20
};
/* possible values of ->cache */
@@ -109,8 +116,28 @@ struct v9fs_session_info {
struct list_head slist; /* list of sessions registered with v9fs */
struct backing_dev_info bdi;
struct rw_semaphore rename_sem;
+ struct p9_fid *root_fid; /* Used for file system sync */
+};
+
+/* cache_validity flags */
+#define V9FS_INO_INVALID_ATTR 0x01
+
+struct v9fs_inode {
+#ifdef CONFIG_9P_FSCACHE
+ spinlock_t fscache_lock;
+ struct fscache_cookie *fscache;
+ struct p9_qid *fscache_key;
+#endif
+ unsigned int cache_validity;
+ struct p9_fid *writeback_fid;
+ struct inode vfs_inode;
};
+static inline struct v9fs_inode *V9FS_I(const struct inode *inode)
+{
+ return container_of(inode, struct v9fs_inode, vfs_inode);
+}
+
struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
char *);
extern void v9fs_session_close(struct v9fs_session_info *v9ses);
@@ -124,16 +151,15 @@ extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
void *p);
-extern struct inode *v9fs_inode(struct v9fs_session_info *v9ses,
- struct p9_fid *fid,
- struct super_block *sb);
-
+extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
+ struct p9_fid *fid,
+ struct super_block *sb);
extern const struct inode_operations v9fs_dir_inode_operations_dotl;
extern const struct inode_operations v9fs_file_inode_operations_dotl;
extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
-extern struct inode *v9fs_inode_dotl(struct v9fs_session_info *v9ses,
- struct p9_fid *fid,
- struct super_block *sb);
+extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
+ struct p9_fid *fid,
+ struct super_block *sb);
/* other default globals */
#define V9FS_PORT 564
@@ -158,7 +184,7 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
}
/**
- * v9fs_inode_from_fid - Helper routine to populate an inode by
+ * v9fs_get_inode_from_fid - Helper routine to populate an inode by
* issuing a attribute request
* @v9ses: session information
* @fid: fid to issue attribute request for
@@ -166,11 +192,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
*
*/
static inline struct inode *
-v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
{
if (v9fs_proto_dotl(v9ses))
- return v9fs_inode_dotl(v9ses, fid, sb);
+ return v9fs_inode_from_fid_dotl(v9ses, fid, sb);
else
- return v9fs_inode(v9ses, fid, sb);
+ return v9fs_inode_from_fid(v9ses, fid, sb);
}
+#endif
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index b789f8e597ec..4014160903a9 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -20,6 +20,8 @@
* Boston, MA 02111-1301 USA
*
*/
+#ifndef FS_9P_V9FS_VFS_H
+#define FS_9P_V9FS_VFS_H
/* plan9 semantics are that created files are implicitly opened.
* But linux semantics are that you call create, then open.
@@ -36,6 +38,7 @@
* unlink calls remove, which is an implicit clunk. So we have to track
* that kind of thing so that we don't try to clunk a dead fid.
*/
+#define P9_LOCK_TIMEOUT (30*HZ)
extern struct file_system_type v9fs_fs_type;
extern const struct address_space_operations v9fs_addr_operations;
@@ -45,13 +48,15 @@ extern const struct file_operations v9fs_dir_operations;
extern const struct file_operations v9fs_dir_operations_dotl;
extern const struct dentry_operations v9fs_dentry_operations;
extern const struct dentry_operations v9fs_cached_dentry_operations;
+extern const struct file_operations v9fs_cached_file_operations;
+extern const struct file_operations v9fs_cached_file_operations_dotl;
+extern struct kmem_cache *v9fs_inode_cache;
-#ifdef CONFIG_9P_FSCACHE
struct inode *v9fs_alloc_inode(struct super_block *sb);
void v9fs_destroy_inode(struct inode *inode);
-#endif
-
struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+int v9fs_init_inode(struct v9fs_session_info *v9ses,
+ struct inode *inode, int mode);
void v9fs_evict_inode(struct inode *inode);
ino_t v9fs_qid2ino(struct p9_qid *qid);
void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
@@ -62,8 +67,19 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
int v9fs_uflags2omode(int uflags, int extended);
ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
+ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
void v9fs_blank_wstat(struct p9_wstat *wstat);
int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
int v9fs_file_fsync_dotl(struct file *filp, int datasync);
-
-#define P9_LOCK_TIMEOUT (30*HZ)
+ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
+ const char __user *, size_t, loff_t *, int);
+int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
+int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
+static inline void v9fs_invalidate_inode_attr(struct inode *inode)
+{
+ struct v9fs_inode *v9inode;
+ v9inode = V9FS_I(inode);
+ v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
+ return;
+}
+#endif
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index b7f2a8e3863e..2524e4cbb8ea 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -39,16 +39,16 @@
#include "v9fs.h"
#include "v9fs_vfs.h"
#include "cache.h"
+#include "fid.h"
/**
- * v9fs_vfs_readpage - read an entire page in from 9P
+ * v9fs_fid_readpage - read an entire page in from 9P
*
- * @filp: file being read
+ * @fid: fid being read
* @page: structure to page
*
*/
-
-static int v9fs_vfs_readpage(struct file *filp, struct page *page)
+static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
{
int retval;
loff_t offset;
@@ -67,7 +67,7 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
buffer = kmap(page);
offset = page_offset(page);
- retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset);
+ retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset);
if (retval < 0) {
v9fs_uncache_page(inode, page);
goto done;
@@ -87,6 +87,19 @@ done:
}
/**
+ * v9fs_vfs_readpage - read an entire page in from 9P
+ *
+ * @filp: file being read
+ * @page: structure to page
+ *
+ */
+
+static int v9fs_vfs_readpage(struct file *filp, struct page *page)
+{
+ return v9fs_fid_readpage(filp->private_data, page);
+}
+
+/**
* v9fs_vfs_readpages - read a set of pages from 9P
*
* @filp: file being read
@@ -124,7 +137,6 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
{
if (PagePrivate(page))
return 0;
-
return v9fs_fscache_release_page(page, gfp);
}
@@ -137,20 +149,89 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
static void v9fs_invalidate_page(struct page *page, unsigned long offset)
{
+ /*
+ * If called with zero offset, we should release
+ * the private state assocated with the page
+ */
if (offset == 0)
v9fs_fscache_invalidate_page(page);
}
+static int v9fs_vfs_writepage_locked(struct page *page)
+{
+ char *buffer;
+ int retval, len;
+ loff_t offset, size;
+ mm_segment_t old_fs;
+ struct v9fs_inode *v9inode;
+ struct inode *inode = page->mapping->host;
+
+ v9inode = V9FS_I(inode);
+ size = i_size_read(inode);
+ if (page->index == size >> PAGE_CACHE_SHIFT)
+ len = size & ~PAGE_CACHE_MASK;
+ else
+ len = PAGE_CACHE_SIZE;
+
+ set_page_writeback(page);
+
+ buffer = kmap(page);
+ offset = page_offset(page);
+
+ old_fs = get_fs();
+ set_fs(get_ds());
+ /* We should have writeback_fid always set */
+ BUG_ON(!v9inode->writeback_fid);
+
+ retval = v9fs_file_write_internal(inode,
+ v9inode->writeback_fid,
+ (__force const char __user *)buffer,
+ len, &offset, 0);
+ if (retval > 0)
+ retval = 0;
+
+ set_fs(old_fs);
+ kunmap(page);
+ end_page_writeback(page);
+ return retval;
+}
+
+static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+ int retval;
+
+ retval = v9fs_vfs_writepage_locked(page);
+ if (retval < 0) {
+ if (retval == -EAGAIN) {
+ redirty_page_for_writepage(wbc, page);
+ retval = 0;
+ } else {
+ SetPageError(page);
+ mapping_set_error(page->mapping, retval);
+ }
+ } else
+ retval = 0;
+
+ unlock_page(page);
+ return retval;
+}
+
/**
* v9fs_launder_page - Writeback a dirty page
- * Since the writes go directly to the server, we simply return a 0
- * here to indicate success.
- *
* Returns 0 on success.
*/
static int v9fs_launder_page(struct page *page)
{
+ int retval;
+ struct inode *inode = page->mapping->host;
+
+ v9fs_fscache_wait_on_page_write(inode, page);
+ if (clear_page_dirty_for_io(page)) {
+ retval = v9fs_vfs_writepage_locked(page);
+ if (retval)
+ return retval;
+ }
return 0;
}
@@ -173,9 +254,15 @@ static int v9fs_launder_page(struct page *page)
* with an error.
*
*/
-ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t pos, unsigned long nr_segs)
+static ssize_t
+v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+ loff_t pos, unsigned long nr_segs)
{
+ /*
+ * FIXME
+ * Now that we do caching with cache mode enabled, We need
+ * to support direct IO
+ */
P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
"off/no(%lld/%lu) EINVAL\n",
iocb->ki_filp->f_path.dentry->d_name.name,
@@ -183,11 +270,84 @@ ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
return -EINVAL;
}
+
+static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
+{
+ int retval = 0;
+ struct page *page;
+ struct v9fs_inode *v9inode;
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ struct inode *inode = mapping->host;
+
+ v9inode = V9FS_I(inode);
+start:
+ page = grab_cache_page_write_begin(mapping, index, flags);
+ if (!page) {
+ retval = -ENOMEM;
+ goto out;
+ }
+ BUG_ON(!v9inode->writeback_fid);
+ if (PageUptodate(page))
+ goto out;
+
+ if (len == PAGE_CACHE_SIZE)
+ goto out;
+
+ retval = v9fs_fid_readpage(v9inode->writeback_fid, page);
+ page_cache_release(page);
+ if (!retval)
+ goto start;
+out:
+ *pagep = page;
+ return retval;
+}
+
+static int v9fs_write_end(struct file *filp, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ loff_t last_pos = pos + copied;
+ struct inode *inode = page->mapping->host;
+
+ if (unlikely(copied < len)) {
+ /*
+ * zero out the rest of the area
+ */
+ unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+
+ zero_user(page, from + copied, len - copied);
+ flush_dcache_page(page);
+ }
+
+ if (!PageUptodate(page))
+ SetPageUptodate(page);
+ /*
+ * No need to use i_size_read() here, the i_size
+ * cannot change under us because we hold the i_mutex.
+ */
+ if (last_pos > inode->i_size) {
+ inode_add_bytes(inode, last_pos - inode->i_size);
+ i_size_write(inode, last_pos);
+ }
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+
+ return copied;
+}
+
+
const struct address_space_operations v9fs_addr_operations = {
- .readpage = v9fs_vfs_readpage,
- .readpages = v9fs_vfs_readpages,
- .releasepage = v9fs_release_page,
- .invalidatepage = v9fs_invalidate_page,
- .launder_page = v9fs_launder_page,
- .direct_IO = v9fs_direct_IO,
+ .readpage = v9fs_vfs_readpage,
+ .readpages = v9fs_vfs_readpages,
+ .set_page_dirty = __set_page_dirty_nobuffers,
+ .writepage = v9fs_vfs_writepage,
+ .write_begin = v9fs_write_begin,
+ .write_end = v9fs_write_end,
+ .releasepage = v9fs_release_page,
+ .invalidatepage = v9fs_invalidate_page,
+ .launder_page = v9fs_launder_page,
+ .direct_IO = v9fs_direct_IO,
};
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 233b7d4ffe5e..b6a3b9f7fe4d 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -63,20 +63,15 @@ static int v9fs_dentry_delete(const struct dentry *dentry)
* v9fs_cached_dentry_delete - called when dentry refcount equals 0
* @dentry: dentry in question
*
- * Only return 1 if our inode is invalid. Only non-synthetic files
- * (ones without mtime == 0) should be calling this function.
- *
*/
-
static int v9fs_cached_dentry_delete(const struct dentry *dentry)
{
- struct inode *inode = dentry->d_inode;
- P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
- dentry);
+ P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n",
+ dentry->d_name.name, dentry);
- if(!inode)
+ /* Don't cache negative dentries */
+ if (!dentry->d_inode)
return 1;
-
return 0;
}
@@ -105,7 +100,41 @@ static void v9fs_dentry_release(struct dentry *dentry)
}
}
+static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ struct p9_fid *fid;
+ struct inode *inode;
+ struct v9fs_inode *v9inode;
+
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ inode = dentry->d_inode;
+ if (!inode)
+ goto out_valid;
+
+ v9inode = V9FS_I(inode);
+ if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) {
+ int retval;
+ struct v9fs_session_info *v9ses;
+ fid = v9fs_fid_lookup(dentry);
+ if (IS_ERR(fid))
+ return PTR_ERR(fid);
+
+ v9ses = v9fs_inode2v9ses(inode);
+ if (v9fs_proto_dotl(v9ses))
+ retval = v9fs_refresh_inode_dotl(fid, inode);
+ else
+ retval = v9fs_refresh_inode(fid, inode);
+ if (retval <= 0)
+ return retval;
+ }
+out_valid:
+ return 1;
+}
+
const struct dentry_operations v9fs_cached_dentry_operations = {
+ .d_revalidate = v9fs_lookup_revalidate,
.d_delete = v9fs_cached_dentry_delete,
.d_release = v9fs_dentry_release,
};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index b84ebe8cefed..9c2bdda5cd9d 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -295,7 +295,6 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
P9_DPRINTK(P9_DEBUG_VFS,
"v9fs_dir_release: inode: %p filp: %p fid: %d\n",
inode, filp, fid ? fid->fid : -1);
- filemap_write_and_wait(inode->i_mapping);
if (fid)
p9_client_clunk(fid);
return 0;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 240c30674396..78bcb97c3425 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -44,8 +44,7 @@
#include "fid.h"
#include "cache.h"
-static const struct file_operations v9fs_cached_file_operations;
-static const struct file_operations v9fs_cached_file_operations_dotl;
+static const struct vm_operations_struct v9fs_file_vm_ops;
/**
* v9fs_file_open - open a file (or directory)
@@ -57,11 +56,13 @@ static const struct file_operations v9fs_cached_file_operations_dotl;
int v9fs_file_open(struct inode *inode, struct file *file)
{
int err;
+ struct v9fs_inode *v9inode;
struct v9fs_session_info *v9ses;
struct p9_fid *fid;
int omode;
P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
+ v9inode = V9FS_I(inode);
v9ses = v9fs_inode2v9ses(inode);
if (v9fs_proto_dotl(v9ses))
omode = file->f_flags;
@@ -89,20 +90,30 @@ int v9fs_file_open(struct inode *inode, struct file *file)
}
file->private_data = fid;
- if ((fid->qid.version) && (v9ses->cache)) {
- P9_DPRINTK(P9_DEBUG_VFS, "cached");
- /* enable cached file options */
- if(file->f_op == &v9fs_file_operations)
- file->f_op = &v9fs_cached_file_operations;
- else if (file->f_op == &v9fs_file_operations_dotl)
- file->f_op = &v9fs_cached_file_operations_dotl;
-
+ if (v9ses->cache && !v9inode->writeback_fid) {
+ /*
+ * clone a fid and add it to writeback_fid
+ * we do it during open time instead of
+ * page dirty time via write_begin/page_mkwrite
+ * because we want write after unlink usecase
+ * to work.
+ */
+ fid = v9fs_writeback_fid(file->f_path.dentry);
+ if (IS_ERR(fid)) {
+ err = PTR_ERR(fid);
+ goto out_error;
+ }
+ v9inode->writeback_fid = (void *) fid;
+ }
#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache)
v9fs_cache_inode_set_cookie(inode, file);
#endif
- }
-
return 0;
+out_error:
+ p9_client_clunk(file->private_data);
+ file->private_data = NULL;
+ return err;
}
/**
@@ -335,25 +346,22 @@ out_err:
}
/**
- * v9fs_file_readn - read from a file
- * @filp: file pointer to read
+ * v9fs_fid_readn - read from a fid
+ * @fid: fid to read
* @data: data buffer to read data into
* @udata: user data buffer to read data into
* @count: size of buffer
* @offset: offset at which to read data
*
*/
-
ssize_t
-v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
+v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
u64 offset)
{
int n, total, size;
- struct p9_fid *fid = filp->private_data;
P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
- (long long unsigned) offset, count);
-
+ (long long unsigned) offset, count);
n = 0;
total = 0;
size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
@@ -379,6 +387,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
}
/**
+ * v9fs_file_readn - read from a file
+ * @filp: file pointer to read
+ * @data: data buffer to read data into
+ * @udata: user data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+ssize_t
+v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
+ u64 offset)
+{
+ return v9fs_fid_readn(filp->private_data, data, udata, count, offset);
+}
+
+/**
* v9fs_file_read - read from a file
* @filp: file pointer to read
* @udata: user data buffer to read data into
@@ -410,45 +434,22 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
return ret;
}
-/**
- * v9fs_file_write - write to a file
- * @filp: file pointer to write
- * @data: data buffer to write data from
- * @count: size of buffer
- * @offset: offset at which to write data
- *
- */
-
-static ssize_t
-v9fs_file_write(struct file *filp, const char __user * data,
- size_t count, loff_t * offset)
+ssize_t
+v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
+ const char __user *data, size_t count,
+ loff_t *offset, int invalidate)
{
- ssize_t retval;
- size_t total = 0;
int n;
- struct p9_fid *fid;
+ loff_t i_size;
+ size_t total = 0;
struct p9_client *clnt;
- struct inode *inode = filp->f_path.dentry->d_inode;
loff_t origin = *offset;
unsigned long pg_start, pg_end;
P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
(int)count, (int)*offset);
- fid = filp->private_data;
clnt = fid->clnt;
-
- retval = generic_write_checks(filp, &origin, &count, 0);
- if (retval)
- goto out;
-
- retval = -EINVAL;
- if ((ssize_t) count < 0)
- goto out;
- retval = 0;
- if (!count)
- goto out;
-
do {
n = p9_client_write(fid, NULL, data+total, origin+total, count);
if (n <= 0)
@@ -457,25 +458,60 @@ v9fs_file_write(struct file *filp, const char __user * data,
total += n;
} while (count > 0);
- if (total > 0) {
+ if (invalidate && (total > 0)) {
pg_start = origin >> PAGE_CACHE_SHIFT;
pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
if (inode->i_mapping && inode->i_mapping->nrpages)
invalidate_inode_pages2_range(inode->i_mapping,
pg_start, pg_end);
*offset += total;
- i_size_write(inode, i_size_read(inode) + total);
- inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
+ i_size = i_size_read(inode);
+ if (*offset > i_size) {
+ inode_add_bytes(inode, *offset - i_size);
+ i_size_write(inode, *offset);
+ }
}
-
if (n < 0)
- retval = n;
- else
- retval = total;
+ return n;
+
+ return total;
+}
+
+/**
+ * v9fs_file_write - write to a file
+ * @filp: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+static ssize_t
+v9fs_file_write(struct file *filp, const char __user * data,
+ size_t count, loff_t *offset)
+{
+ ssize_t retval = 0;
+ loff_t origin = *offset;
+
+
+ retval = generic_write_checks(filp, &origin, &count, 0);
+ if (retval)
+ goto out;
+
+ retval = -EINVAL;
+ if ((ssize_t) count < 0)
+ goto out;
+ retval = 0;
+ if (!count)
+ goto out;
+
+ return v9fs_file_write_internal(filp->f_path.dentry->d_inode,
+ filp->private_data,
+ data, count, offset, 1);
out:
return retval;
}
+
static int v9fs_file_fsync(struct file *filp, int datasync)
{
struct p9_fid *fid;
@@ -505,28 +541,182 @@ int v9fs_file_fsync_dotl(struct file *filp, int datasync)
return retval;
}
-static const struct file_operations v9fs_cached_file_operations = {
+static int
+v9fs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int retval;
+
+ retval = generic_file_mmap(file, vma);
+ if (!retval)
+ vma->vm_ops = &v9fs_file_vm_ops;
+
+ return retval;
+}
+
+static int
+v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct v9fs_inode *v9inode;
+ struct page *page = vmf->page;
+ struct file *filp = vma->vm_file;
+ struct inode *inode = filp->f_path.dentry->d_inode;
+
+
+ P9_DPRINTK(P9_DEBUG_VFS, "page %p fid %lx\n",
+ page, (unsigned long)filp->private_data);
+
+ v9inode = V9FS_I(inode);
+ /* make sure the cache has finished storing the page */
+ v9fs_fscache_wait_on_page_write(inode, page);
+ BUG_ON(!v9inode->writeback_fid);
+ lock_page(page);
+ if (page->mapping != inode->i_mapping)
+ goto out_unlock;
+
+ return VM_FAULT_LOCKED;
+out_unlock:
+ unlock_page(page);
+ return VM_FAULT_NOPAGE;
+}
+
+static ssize_t
+v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
+ loff_t *offsetp)
+{
+ loff_t size, offset;
+ struct inode *inode;
+ struct address_space *mapping;
+
+ offset = *offsetp;
+ mapping = filp->f_mapping;
+ inode = mapping->host;
+ if (!count)
+ return 0;
+ size = i_size_read(inode);
+ if (offset < size)
+ filemap_write_and_wait_range(mapping, offset,
+ offset + count - 1);
+
+ return v9fs_file_read(filp, udata, count, offsetp);
+}
+
+/**
+ * v9fs_cached_file_read - read from a file
+ * @filp: file pointer to read
+ * @udata: user data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+static ssize_t
+v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
+ loff_t *offset)
+{
+ if (filp->f_flags & O_DIRECT)
+ return v9fs_direct_read(filp, data, count, offset);
+ return do_sync_read(filp, data, count, offset);
+}
+
+static ssize_t
+v9fs_direct_write(struct file *filp, const char __user * data,
+ size_t count, loff_t *offsetp)
+{
+ loff_t offset;
+ ssize_t retval;
+ struct inode *inode;
+ struct address_space *mapping;
+
+ offset = *offsetp;
+ mapping = filp->f_mapping;
+ inode = mapping->host;
+ if (!count)
+ return 0;
+
+ mutex_lock(&inode->i_mutex);
+ retval = filemap_write_and_wait_range(mapping, offset,
+ offset + count - 1);
+ if (retval)
+ goto err_out;
+ /*
+ * After a write we want buffered reads to be sure to go to disk to get
+ * the new data. We invalidate clean cached page from the region we're
+ * about to write. We do this *before* the write so that if we fail
+ * here we fall back to buffered write
+ */
+ if (mapping->nrpages) {
+ pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
+ pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
+
+ retval = invalidate_inode_pages2_range(mapping,
+ pg_start, pg_end);
+ /*
+ * If a page can not be invalidated, fall back
+ * to buffered write.
+ */
+ if (retval) {
+ if (retval == -EBUSY)
+ goto buff_write;
+ goto err_out;
+ }
+ }
+ retval = v9fs_file_write(filp, data, count, offsetp);
+err_out:
+ mutex_unlock(&inode->i_mutex);
+ return retval;
+
+buff_write:
+ mutex_unlock(&inode->i_mutex);
+ return do_sync_write(filp, data, count, offsetp);
+}
+
+/**
+ * v9fs_cached_file_write - write to a file
+ * @filp: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+static ssize_t
+v9fs_cached_file_write(struct file *filp, const char __user * data,
+ size_t count, loff_t *offset)
+{
+
+ if (filp->f_flags & O_DIRECT)
+ return v9fs_direct_write(filp, data, count, offset);
+ return do_sync_write(filp, data, count, offset);
+}
+
+static const struct vm_operations_struct v9fs_file_vm_ops = {
+ .fault = filemap_fault,
+ .page_mkwrite = v9fs_vm_page_mkwrite,
+};
+
+
+const struct file_operations v9fs_cached_file_operations = {
.llseek = generic_file_llseek,
- .read = do_sync_read,
+ .read = v9fs_cached_file_read,
+ .write = v9fs_cached_file_write,
.aio_read = generic_file_aio_read,
- .write = v9fs_file_write,
+ .aio_write = generic_file_aio_write,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
- .mmap = generic_file_readonly_mmap,
+ .mmap = v9fs_file_mmap,
.fsync = v9fs_file_fsync,
};
-static const struct file_operations v9fs_cached_file_operations_dotl = {
+const struct file_operations v9fs_cached_file_operations_dotl = {
.llseek = generic_file_llseek,
- .read = do_sync_read,
+ .read = v9fs_cached_file_read,
+ .write = v9fs_cached_file_write,
.aio_read = generic_file_aio_read,
- .write = v9fs_file_write,
+ .aio_write = generic_file_aio_write,
.open = v9fs_file_open,
.release = v9fs_dir_release,
.lock = v9fs_file_lock_dotl,
.flock = v9fs_file_flock_dotl,
- .mmap = generic_file_readonly_mmap,
+ .mmap = v9fs_file_mmap,
.fsync = v9fs_file_fsync_dotl,
};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b76a40bdf4c2..8a2c232f708a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -203,26 +203,25 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
wstat->extension = NULL;
}
-#ifdef CONFIG_9P_FSCACHE
/**
* v9fs_alloc_inode - helper function to allocate an inode
- * This callback is executed before setting up the inode so that we
- * can associate a vcookie with each inode.
*
*/
-
struct inode *v9fs_alloc_inode(struct super_block *sb)
{
- struct v9fs_cookie *vcookie;
- vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache,
- GFP_KERNEL);
- if (!vcookie)
+ struct v9fs_inode *v9inode;
+ v9inode = (struct v9fs_inode *)kmem_cache_alloc(v9fs_inode_cache,
+ GFP_KERNEL);
+ if (!v9inode)
return NULL;
-
- vcookie->fscache = NULL;
- vcookie->qid = NULL;
- spin_lock_init(&vcookie->lock);
- return &vcookie->inode;
+#ifdef CONFIG_9P_FSCACHE
+ v9inode->fscache = NULL;
+ v9inode->fscache_key = NULL;
+ spin_lock_init(&v9inode->fscache_lock);
+#endif
+ v9inode->writeback_fid = NULL;
+ v9inode->cache_validity = 0;
+ return &v9inode->vfs_inode;
}
/**
@@ -234,35 +233,18 @@ static void v9fs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
INIT_LIST_HEAD(&inode->i_dentry);
- kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
+ kmem_cache_free(v9fs_inode_cache, V9FS_I(inode));
}
void v9fs_destroy_inode(struct inode *inode)
{
call_rcu(&inode->i_rcu, v9fs_i_callback);
}
-#endif
-/**
- * v9fs_get_inode - helper function to setup an inode
- * @sb: superblock
- * @mode: mode to setup inode with
- *
- */
-
-struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+int v9fs_init_inode(struct v9fs_session_info *v9ses,
+ struct inode *inode, int mode)
{
- int err;
- struct inode *inode;
- struct v9fs_session_info *v9ses = sb->s_fs_info;
-
- P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
-
- inode = new_inode(sb);
- if (!inode) {
- P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
- return ERR_PTR(-ENOMEM);
- }
+ int err = 0;
inode_init_owner(inode, NULL, mode);
inode->i_blocks = 0;
@@ -292,14 +274,20 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
case S_IFREG:
if (v9fs_proto_dotl(v9ses)) {
inode->i_op = &v9fs_file_inode_operations_dotl;
- inode->i_fop = &v9fs_file_operations_dotl;
+ if (v9ses->cache)
+ inode->i_fop =
+ &v9fs_cached_file_operations_dotl;
+ else
+ inode->i_fop = &v9fs_file_operations_dotl;
} else {
inode->i_op = &v9fs_file_inode_operations;
- inode->i_fop = &v9fs_file_operations;
+ if (v9ses->cache)
+ inode->i_fop = &v9fs_cached_file_operations;
+ else
+ inode->i_fop = &v9fs_file_operations;
}
break;
-
case S_IFLNK:
if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) {
P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with "
@@ -335,12 +323,37 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
err = -EINVAL;
goto error;
}
+error:
+ return err;
- return inode;
+}
-error:
- iput(inode);
- return ERR_PTR(err);
+/**
+ * v9fs_get_inode - helper function to setup an inode
+ * @sb: superblock
+ * @mode: mode to setup inode with
+ *
+ */
+
+struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+{
+ int err;
+ struct inode *inode;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
+
+ inode = new_inode(sb);
+ if (!inode) {
+ P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ err = v9fs_init_inode(v9ses, inode, mode);
+ if (err) {
+ iput(inode);
+ return ERR_PTR(err);
+ }
+ return inode;
}
/*
@@ -403,6 +416,8 @@ error:
*/
void v9fs_evict_inode(struct inode *inode)
{
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+
truncate_inode_pages(inode->i_mapping, 0);
end_writeback(inode);
filemap_fdatawrite(inode->i_mapping);
@@ -410,41 +425,67 @@ void v9fs_evict_inode(struct inode *inode)
#ifdef CONFIG_9P_FSCACHE
v9fs_cache_inode_put_cookie(inode);
#endif
+ /* clunk the fid stashed in writeback_fid */
+ if (v9inode->writeback_fid) {
+ p9_client_clunk(v9inode->writeback_fid);
+ v9inode->writeback_fid = NULL;
+ }
}
-struct inode *
-v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+static struct inode *v9fs_qid_iget(struct super_block *sb,
+ struct p9_qid *qid,
+ struct p9_wstat *st)
{
- int err, umode;
- struct inode *ret = NULL;
- struct p9_wstat *st;
-
- st = p9_client_stat(fid);
- if (IS_ERR(st))
- return ERR_CAST(st);
+ int retval, umode;
+ unsigned long i_ino;
+ struct inode *inode;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+ i_ino = v9fs_qid2ino(qid);
+ inode = iget_locked(sb, i_ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW))
+ return inode;
+ /*
+ * initialize the inode with the stat info
+ * FIXME!! we may need support for stale inodes
+ * later.
+ */
umode = p9mode2unixmode(v9ses, st->mode);
- ret = v9fs_get_inode(sb, umode);
- if (IS_ERR(ret)) {
- err = PTR_ERR(ret);
+ retval = v9fs_init_inode(v9ses, inode, umode);
+ if (retval)
goto error;
- }
-
- v9fs_stat2inode(st, ret, sb);
- ret->i_ino = v9fs_qid2ino(&st->qid);
+ v9fs_stat2inode(st, inode, sb);
#ifdef CONFIG_9P_FSCACHE
- v9fs_vcookie_set_qid(ret, &st->qid);
- v9fs_cache_inode_get_cookie(ret);
+ v9fs_fscache_set_key(inode, &st->qid);
+ v9fs_cache_inode_get_cookie(inode);
#endif
- p9stat_free(st);
- kfree(st);
- return ret;
+ unlock_new_inode(inode);
+ return inode;
error:
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(retval);
+
+}
+
+struct inode *
+v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
+{
+ struct p9_wstat *st;
+ struct inode *inode = NULL;
+
+ st = p9_client_stat(fid);
+ if (IS_ERR(st))
+ return ERR_CAST(st);
+
+ inode = v9fs_qid_iget(sb, &st->qid, st);
p9stat_free(st);
kfree(st);
- return ERR_PTR(err);
+ return inode;
}
/**
@@ -458,8 +499,8 @@ error:
static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
{
int retval;
- struct inode *file_inode;
struct p9_fid *v9fid;
+ struct inode *file_inode;
P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
rmdir);
@@ -470,8 +511,20 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
return PTR_ERR(v9fid);
retval = p9_client_remove(v9fid);
- if (!retval)
- drop_nlink(file_inode);
+ if (!retval) {
+ /*
+ * directories on unlink should have zero
+ * link count
+ */
+ if (rmdir) {
+ clear_nlink(file_inode);
+ drop_nlink(dir);
+ } else
+ drop_nlink(file_inode);
+
+ v9fs_invalidate_inode_attr(file_inode);
+ v9fs_invalidate_inode_attr(dir);
+ }
return retval;
}
@@ -531,7 +584,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
}
/* instantiate inode and assign the unopened fid to the dentry */
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -570,9 +623,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
int err;
u32 perm;
int flags;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid;
struct file *filp;
+ struct v9fs_inode *v9inode;
+ struct v9fs_session_info *v9ses;
+ struct p9_fid *fid, *inode_fid;
err = 0;
fid = NULL;
@@ -592,8 +646,25 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
goto error;
}
+ v9fs_invalidate_inode_attr(dir);
/* if we are opening a file, assign the open fid to the file */
if (nd && nd->flags & LOOKUP_OPEN) {
+ v9inode = V9FS_I(dentry->d_inode);
+ if (v9ses->cache && !v9inode->writeback_fid) {
+ /*
+ * clone a fid and add it to writeback_fid
+ * we do it during open time instead of
+ * page dirty time via write_begin/page_mkwrite
+ * because we want write after unlink usecase
+ * to work.
+ */
+ inode_fid = v9fs_writeback_fid(dentry);
+ if (IS_ERR(inode_fid)) {
+ err = PTR_ERR(inode_fid);
+ goto error;
+ }
+ v9inode->writeback_fid = (void *) inode_fid;
+ }
filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
if (IS_ERR(filp)) {
err = PTR_ERR(filp);
@@ -601,6 +672,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
}
filp->private_data = fid;
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache)
+ v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
+#endif
} else
p9_client_clunk(fid);
@@ -625,8 +700,8 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
int err;
u32 perm;
- struct v9fs_session_info *v9ses;
struct p9_fid *fid;
+ struct v9fs_session_info *v9ses;
P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
err = 0;
@@ -636,6 +711,9 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
fid = NULL;
+ } else {
+ inc_nlink(dir);
+ v9fs_invalidate_inode_attr(dir);
}
if (fid)
@@ -687,7 +765,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(result);
}
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
result = PTR_ERR(inode);
inode = NULL;
@@ -747,17 +825,19 @@ int
v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
+ int retval;
struct inode *old_inode;
+ struct inode *new_inode;
struct v9fs_session_info *v9ses;
struct p9_fid *oldfid;
struct p9_fid *olddirfid;
struct p9_fid *newdirfid;
struct p9_wstat wstat;
- int retval;
P9_DPRINTK(P9_DEBUG_VFS, "\n");
retval = 0;
old_inode = old_dentry->d_inode;
+ new_inode = new_dentry->d_inode;
v9ses = v9fs_inode2v9ses(old_inode);
oldfid = v9fs_fid_lookup(old_dentry);
if (IS_ERR(oldfid))
@@ -798,9 +878,30 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = p9_client_wstat(oldfid, &wstat);
clunk_newdir:
- if (!retval)
+ if (!retval) {
+ if (new_inode) {
+ if (S_ISDIR(new_inode->i_mode))
+ clear_nlink(new_inode);
+ else
+ drop_nlink(new_inode);
+ /*
+ * Work around vfs rename rehash bug with
+ * FS_RENAME_DOES_D_MOVE
+ */
+ v9fs_invalidate_inode_attr(new_inode);
+ }
+ if (S_ISDIR(old_inode->i_mode)) {
+ if (!new_inode)
+ inc_nlink(new_dir);
+ drop_nlink(old_dir);
+ }
+ v9fs_invalidate_inode_attr(old_inode);
+ v9fs_invalidate_inode_attr(old_dir);
+ v9fs_invalidate_inode_attr(new_dir);
+
/* successful rename */
d_move(old_dentry, new_dentry);
+ }
up_write(&v9ses->rename_sem);
p9_client_clunk(newdirfid);
@@ -831,9 +932,10 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
err = -EPERM;
v9ses = v9fs_inode2v9ses(dentry->d_inode);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- return simple_getattr(mnt, dentry, stat);
-
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ generic_fillattr(dentry->d_inode, stat);
+ return 0;
+ }
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return PTR_ERR(fid);
@@ -891,17 +993,20 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
if (iattr->ia_valid & ATTR_GID)
wstat.n_gid = iattr->ia_gid;
}
-
- retval = p9_client_wstat(fid, &wstat);
- if (retval < 0)
- return retval;
-
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(dentry->d_inode)) {
retval = vmtruncate(dentry->d_inode, iattr->ia_size);
if (retval)
return retval;
}
+ /* Write all dirty data */
+ if (S_ISREG(dentry->d_inode->i_mode))
+ filemap_write_and_wait(dentry->d_inode->i_mapping);
+
+ retval = p9_client_wstat(fid, &wstat);
+ if (retval < 0)
+ return retval;
+ v9fs_invalidate_inode_attr(dentry->d_inode);
setattr_copy(dentry->d_inode, iattr);
mark_inode_dirty(dentry->d_inode);
@@ -924,6 +1029,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
char tag_name[14];
unsigned int i_nlink;
struct v9fs_session_info *v9ses = sb->s_fs_info;
+ struct v9fs_inode *v9inode = V9FS_I(inode);
inode->i_nlink = 1;
@@ -983,6 +1089,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
/* not real number of blocks, but 512 byte ones ... */
inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
+ v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
}
/**
@@ -1115,8 +1222,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
int mode, const char *extension)
{
u32 perm;
- struct v9fs_session_info *v9ses;
struct p9_fid *fid;
+ struct v9fs_session_info *v9ses;
v9ses = v9fs_inode2v9ses(dir);
if (!v9fs_proto_dotu(v9ses)) {
@@ -1130,6 +1237,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
if (IS_ERR(fid))
return PTR_ERR(fid);
+ v9fs_invalidate_inode_attr(dir);
p9_client_clunk(fid);
return 0;
}
@@ -1166,8 +1274,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
int retval;
- struct p9_fid *oldfid;
char *name;
+ struct p9_fid *oldfid;
P9_DPRINTK(P9_DEBUG_VFS,
" %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
@@ -1186,7 +1294,10 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
sprintf(name, "%d\n", oldfid->fid);
retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
__putname(name);
-
+ if (!retval) {
+ v9fs_refresh_inode(oldfid, old_dentry->d_inode);
+ v9fs_invalidate_inode_attr(dir);
+ }
clunk_fid:
p9_client_clunk(oldfid);
return retval;
@@ -1237,6 +1348,32 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
return retval;
}
+int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
+{
+ loff_t i_size;
+ struct p9_wstat *st;
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ st = p9_client_stat(fid);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ spin_lock(&inode->i_lock);
+ /*
+ * We don't want to refresh inode->i_size,
+ * because we may have cached data
+ */
+ i_size = inode->i_size;
+ v9fs_stat2inode(st, inode, inode->i_sb);
+ if (v9ses->cache)
+ inode->i_size = i_size;
+ spin_unlock(&inode->i_lock);
+ p9stat_free(st);
+ kfree(st);
+ return 0;
+}
+
static const struct inode_operations v9fs_dir_inode_operations_dotu = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index fe3ffa9aace4..67c138e94feb 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -86,40 +86,63 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
return dentry;
}
+static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
+ struct p9_qid *qid,
+ struct p9_fid *fid,
+ struct p9_stat_dotl *st)
+{
+ int retval;
+ unsigned long i_ino;
+ struct inode *inode;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ i_ino = v9fs_qid2ino(qid);
+ inode = iget_locked(sb, i_ino);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW))
+ return inode;
+ /*
+ * initialize the inode with the stat info
+ * FIXME!! we may need support for stale inodes
+ * later.
+ */
+ retval = v9fs_init_inode(v9ses, inode, st->st_mode);
+ if (retval)
+ goto error;
+
+ v9fs_stat2inode_dotl(st, inode);
+#ifdef CONFIG_9P_FSCACHE
+ v9fs_fscache_set_key(inode, &st->qid);
+ v9fs_cache_inode_get_cookie(inode);
+#endif
+ retval = v9fs_get_acl(inode, fid);
+ if (retval)
+ goto error;
+
+ unlock_new_inode(inode);
+ return inode;
+error:
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(retval);
+
+}
+
struct inode *
-v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
- struct super_block *sb)
+v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
+ struct super_block *sb)
{
- struct inode *ret = NULL;
- int err;
struct p9_stat_dotl *st;
+ struct inode *inode = NULL;
st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
if (IS_ERR(st))
return ERR_CAST(st);
- ret = v9fs_get_inode(sb, st->st_mode);
- if (IS_ERR(ret)) {
- err = PTR_ERR(ret);
- goto error;
- }
-
- v9fs_stat2inode_dotl(st, ret);
- ret->i_ino = v9fs_qid2ino(&st->qid);
-#ifdef CONFIG_9P_FSCACHE
- v9fs_vcookie_set_qid(ret, &st->qid);
- v9fs_cache_inode_get_cookie(ret);
-#endif
- err = v9fs_get_acl(ret, fid);
- if (err) {
- iput(ret);
- goto error;
- }
- kfree(st);
- return ret;
-error:
+ inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st);
kfree(st);
- return ERR_PTR(err);
+ return inode;
}
/**
@@ -136,16 +159,17 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
struct nameidata *nd)
{
int err = 0;
- char *name = NULL;
gid_t gid;
int flags;
mode_t mode;
- struct v9fs_session_info *v9ses;
- struct p9_fid *fid = NULL;
- struct p9_fid *dfid, *ofid;
+ char *name = NULL;
struct file *filp;
struct p9_qid qid;
struct inode *inode;
+ struct p9_fid *fid = NULL;
+ struct v9fs_inode *v9inode;
+ struct p9_fid *dfid, *ofid, *inode_fid;
+ struct v9fs_session_info *v9ses;
struct posix_acl *pacl = NULL, *dacl = NULL;
v9ses = v9fs_inode2v9ses(dir);
@@ -196,6 +220,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
err);
goto error;
}
+ v9fs_invalidate_inode_attr(dir);
/* instantiate inode and assign the unopened fid to the dentry */
fid = p9_client_walk(dfid, 1, &name, 1);
@@ -205,7 +230,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
fid = NULL;
goto error;
}
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -219,6 +244,22 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
/* Now set the ACL based on the default value */
v9fs_set_create_acl(dentry, dacl, pacl);
+ v9inode = V9FS_I(inode);
+ if (v9ses->cache && !v9inode->writeback_fid) {
+ /*
+ * clone a fid and add it to writeback_fid
+ * we do it during open time instead of
+ * page dirty time via write_begin/page_mkwrite
+ * because we want write after unlink usecase
+ * to work.
+ */
+ inode_fid = v9fs_writeback_fid(dentry);
+ if (IS_ERR(inode_fid)) {
+ err = PTR_ERR(inode_fid);
+ goto error;
+ }
+ v9inode->writeback_fid = (void *) inode_fid;
+ }
/* Since we are opening a file, assign the open fid to the file */
filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
if (IS_ERR(filp)) {
@@ -226,6 +267,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
return PTR_ERR(filp);
}
filp->private_data = ofid;
+#ifdef CONFIG_9P_FSCACHE
+ if (v9ses->cache)
+ v9fs_cache_inode_set_cookie(inode, filp);
+#endif
return 0;
error:
@@ -300,7 +345,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
goto error;
}
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -327,7 +372,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
}
/* Now set the ACL based on the default value */
v9fs_set_create_acl(dentry, dacl, pacl);
-
+ inc_nlink(dir);
+ v9fs_invalidate_inode_attr(dir);
error:
if (fid)
p9_client_clunk(fid);
@@ -346,9 +392,10 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
err = -EPERM;
v9ses = v9fs_inode2v9ses(dentry->d_inode);
- if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
- return simple_getattr(mnt, dentry, stat);
-
+ if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
+ generic_fillattr(dentry->d_inode, stat);
+ return 0;
+ }
fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return PTR_ERR(fid);
@@ -406,16 +453,20 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
if (IS_ERR(fid))
return PTR_ERR(fid);
- retval = p9_client_setattr(fid, &p9attr);
- if (retval < 0)
- return retval;
-
if ((iattr->ia_valid & ATTR_SIZE) &&
iattr->ia_size != i_size_read(dentry->d_inode)) {
retval = vmtruncate(dentry->d_inode, iattr->ia_size);
if (retval)
return retval;
}
+ /* Write all dirty data */
+ if (S_ISREG(dentry->d_inode->i_mode))
+ filemap_write_and_wait(dentry->d_inode->i_mapping);
+
+ retval = p9_client_setattr(fid, &p9attr);
+ if (retval < 0)
+ return retval;
+ v9fs_invalidate_inode_attr(dentry->d_inode);
setattr_copy(dentry->d_inode, iattr);
mark_inode_dirty(dentry->d_inode);
@@ -439,6 +490,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
void
v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
{
+ struct v9fs_inode *v9inode = V9FS_I(inode);
if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
inode->i_atime.tv_sec = stat->st_atime_sec;
@@ -497,20 +549,21 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
/* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
* because the inode structure does not have fields for them.
*/
+ v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
}
static int
v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
const char *symname)
{
- struct v9fs_session_info *v9ses;
- struct p9_fid *dfid;
- struct p9_fid *fid = NULL;
- struct inode *inode;
- struct p9_qid qid;
- char *name;
int err;
gid_t gid;
+ char *name;
+ struct p9_qid qid;
+ struct inode *inode;
+ struct p9_fid *dfid;
+ struct p9_fid *fid = NULL;
+ struct v9fs_session_info *v9ses;
name = (char *) dentry->d_name.name;
P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
@@ -534,6 +587,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
goto error;
}
+ v9fs_invalidate_inode_attr(dir);
if (v9ses->cache) {
/* Now walk from the parent so we can get an unopened fid. */
fid = p9_client_walk(dfid, 1, &name, 1);
@@ -546,7 +600,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
}
/* instantiate inode and assign the unopened fid to dentry */
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -588,10 +642,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
int err;
- struct p9_fid *dfid, *oldfid;
char *name;
- struct v9fs_session_info *v9ses;
struct dentry *dir_dentry;
+ struct p9_fid *dfid, *oldfid;
+ struct v9fs_session_info *v9ses;
P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
dir->i_ino, old_dentry->d_name.name,
@@ -616,29 +670,17 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
return err;
}
+ v9fs_invalidate_inode_attr(dir);
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
/* Get the latest stat info from server. */
struct p9_fid *fid;
- struct p9_stat_dotl *st;
-
fid = v9fs_fid_lookup(old_dentry);
if (IS_ERR(fid))
return PTR_ERR(fid);
- st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
- if (IS_ERR(st))
- return PTR_ERR(st);
-
- v9fs_stat2inode_dotl(st, old_dentry->d_inode);
-
- kfree(st);
- } else {
- /* Caching disabled. No need to get upto date stat info.
- * This dentry will be released immediately. So, just hold the
- * inode
- */
- ihold(old_dentry->d_inode);
+ v9fs_refresh_inode_dotl(fid, old_dentry->d_inode);
}
+ ihold(old_dentry->d_inode);
d_instantiate(dentry, old_dentry->d_inode);
return err;
@@ -657,12 +699,12 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
dev_t rdev)
{
int err;
+ gid_t gid;
char *name;
mode_t mode;
struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL, *dfid = NULL;
struct inode *inode;
- gid_t gid;
struct p9_qid qid;
struct dentry *dir_dentry;
struct posix_acl *dacl = NULL, *pacl = NULL;
@@ -699,6 +741,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
if (err < 0)
goto error;
+ v9fs_invalidate_inode_attr(dir);
/* instantiate inode and assign the unopened fid to the dentry */
if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
fid = p9_client_walk(dfid, 1, &name, 1);
@@ -710,7 +753,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
goto error;
}
- inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -782,6 +825,31 @@ ndset:
return NULL;
}
+int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
+{
+ loff_t i_size;
+ struct p9_stat_dotl *st;
+ struct v9fs_session_info *v9ses;
+
+ v9ses = v9fs_inode2v9ses(inode);
+ st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
+ if (IS_ERR(st))
+ return PTR_ERR(st);
+
+ spin_lock(&inode->i_lock);
+ /*
+ * We don't want to refresh inode->i_size,
+ * because we may have cached data
+ */
+ i_size = inode->i_size;
+ v9fs_stat2inode_dotl(st, inode);
+ if (v9ses->cache)
+ inode->i_size = i_size;
+ spin_unlock(&inode->i_lock);
+ kfree(st);
+ return 0;
+}
+
const struct inode_operations v9fs_dir_inode_operations_dotl = {
.create = v9fs_vfs_create_dotl,
.lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index dbaabe3b8131..09fd08d1606f 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -86,12 +86,15 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
} else
sb->s_op = &v9fs_super_ops;
sb->s_bdi = &v9ses->bdi;
+ if (v9ses->cache)
+ sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
- sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
- MS_NOATIME;
+ sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
+ if (!v9ses->cache)
+ sb->s_flags |= MS_SYNCHRONOUS;
#ifdef CONFIG_9P_FS_POSIX_ACL
- if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)
+ if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL)
sb->s_flags |= MS_POSIXACL;
#endif
@@ -151,7 +154,6 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
retval = PTR_ERR(inode);
goto release_sb;
}
-
root = d_alloc_root(inode);
if (!root) {
iput(inode);
@@ -166,7 +168,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
retval = PTR_ERR(st);
goto release_sb;
}
-
+ root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
v9fs_stat2inode_dotl(st, root->d_inode);
kfree(st);
} else {
@@ -183,10 +185,21 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
p9stat_free(st);
kfree(st);
}
+ v9fs_fid_add(root, fid);
retval = v9fs_get_acl(inode, fid);
if (retval)
goto release_sb;
- v9fs_fid_add(root, fid);
+ /*
+ * Add the root fid to session info. This is used
+ * for file system sync. We want a cloned fid here
+ * so that we can do a sync_filesystem after a
+ * shrink_dcache_for_umount
+ */
+ v9ses->root_fid = v9fs_fid_clone(root);
+ if (IS_ERR(v9ses->root_fid)) {
+ retval = PTR_ERR(v9ses->root_fid);
+ goto release_sb;
+ }
P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
return dget(sb->s_root);
@@ -197,15 +210,11 @@ close_session:
v9fs_session_close(v9ses);
kfree(v9ses);
return ERR_PTR(retval);
-
release_sb:
/*
- * we will do the session_close and root dentry release
- * in the below call. But we need to clunk fid, because we haven't
- * attached the fid to dentry so it won't get clunked
- * automatically.
+ * we will do the session_close and root dentry
+ * release in the below call.
*/
- p9_client_clunk(fid);
deactivate_locked_super(sb);
return ERR_PTR(retval);
}
@@ -223,7 +232,7 @@ static void v9fs_kill_super(struct super_block *s)
P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
kill_anon_super(s);
-
+ p9_client_clunk(v9ses->root_fid);
v9fs_session_cancel(v9ses);
v9fs_session_close(v9ses);
kfree(v9ses);
@@ -276,11 +285,31 @@ done:
return res;
}
+static int v9fs_sync_fs(struct super_block *sb, int wait)
+{
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
+ return p9_client_sync_fs(v9ses->root_fid);
+}
+
+static int v9fs_drop_inode(struct inode *inode)
+{
+ struct v9fs_session_info *v9ses;
+ v9ses = v9fs_inode2v9ses(inode);
+ if (v9ses->cache)
+ return generic_drop_inode(inode);
+ /*
+ * in case of non cached mode always drop the
+ * the inode because we want the inode attribute
+ * to always match that on the server.
+ */
+ return 1;
+}
+
static const struct super_operations v9fs_super_ops = {
-#ifdef CONFIG_9P_FSCACHE
.alloc_inode = v9fs_alloc_inode,
.destroy_inode = v9fs_destroy_inode,
-#endif
.statfs = simple_statfs,
.evict_inode = v9fs_evict_inode,
.show_options = generic_show_options,
@@ -288,11 +317,11 @@ static const struct super_operations v9fs_super_ops = {
};
static const struct super_operations v9fs_super_ops_dotl = {
-#ifdef CONFIG_9P_FSCACHE
.alloc_inode = v9fs_alloc_inode,
.destroy_inode = v9fs_destroy_inode,
-#endif
+ .sync_fs = v9fs_sync_fs,
.statfs = v9fs_statfs,
+ .drop_inode = v9fs_drop_inode,
.evict_inode = v9fs_evict_inode,
.show_options = generic_show_options,
.umount_begin = v9fs_umount_begin,
@@ -303,5 +332,5 @@ struct file_system_type v9fs_fs_type = {
.mount = v9fs_mount,
.kill_sb = v9fs_kill_super,
.owner = THIS_MODULE,
- .fs_flags = FS_RENAME_DOES_D_MOVE,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT,
};
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa57edc..7cb53aafac1e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
def_bool n
config EXPORTFS
- tristate
+ bool
config FILE_LOCKING
bool "Enable POSIX file locking API" if EXPERT
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef0c0c8..ba01202844c5 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
+obj-$(CONFIG_FHANDLE) += fhandle.o
+
obj-y += quota/
obj-$(CONFIG_PROC_FS) += proc/
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 15690bb1d3b5..789b3afb3423 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -140,6 +140,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
candidate->first = candidate->last = index;
candidate->offset_first = from;
candidate->to_last = to;
+ INIT_LIST_HEAD(&candidate->link);
candidate->usage = 1;
candidate->state = AFS_WBACK_PENDING;
init_waitqueue_head(&candidate->waitq);
diff --git a/fs/aio.c b/fs/aio.c
index fc557a3be0a9..7f54f43b8f7c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -85,7 +85,7 @@ static int __init aio_setup(void)
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
- aio_wq = create_workqueue("aio");
+ aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */
abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
BUG_ON(!aio_wq || !abe_pool);
@@ -239,15 +239,23 @@ static void __put_ioctx(struct kioctx *ctx)
call_rcu(&ctx->rcu_head, ctx_rcu_free);
}
-#define get_ioctx(kioctx) do { \
- BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
- atomic_inc(&(kioctx)->users); \
-} while (0)
-#define put_ioctx(kioctx) do { \
- BUG_ON(atomic_read(&(kioctx)->users) <= 0); \
- if (unlikely(atomic_dec_and_test(&(kioctx)->users))) \
- __put_ioctx(kioctx); \
-} while (0)
+static inline void get_ioctx(struct kioctx *kioctx)
+{
+ BUG_ON(atomic_read(&kioctx->users) <= 0);
+ atomic_inc(&kioctx->users);
+}
+
+static inline int try_get_ioctx(struct kioctx *kioctx)
+{
+ return atomic_inc_not_zero(&kioctx->users);
+}
+
+static inline void put_ioctx(struct kioctx *kioctx)
+{
+ BUG_ON(atomic_read(&kioctx->users) <= 0);
+ if (unlikely(atomic_dec_and_test(&kioctx->users)))
+ __put_ioctx(kioctx);
+}
/* ioctx_alloc
* Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
@@ -569,7 +577,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
spin_unlock(&fput_lock);
- queue_work(aio_wq, &fput_work);
+ schedule_work(&fput_work);
} else {
req->ki_filp = NULL;
really_put_req(ctx, req);
@@ -601,8 +609,13 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
rcu_read_lock();
hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
- if (ctx->user_id == ctx_id && !ctx->dead) {
- get_ioctx(ctx);
+ /*
+ * RCU protects us against accessing freed memory but
+ * we have to be careful not to get a reference when the
+ * reference count already dropped to 0 (ctx->dead test
+ * is unreliable because of races).
+ */
+ if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
ret = ctx;
break;
}
@@ -1629,6 +1642,23 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
spin_lock_irq(&ctx->ctx_lock);
+ /*
+ * We could have raced with io_destroy() and are currently holding a
+ * reference to ctx which should be destroyed. We cannot submit IO
+ * since ctx gets freed as soon as io_submit() puts its reference. The
+ * check here is reliable: io_destroy() sets ctx->dead before waiting
+ * for outstanding IO and the barrier between these two is realized by
+ * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
+ * increment ctx->reqs_active before checking for ctx->dead and the
+ * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
+ * don't see ctx->dead set here, io_destroy() waits for our IO to
+ * finish.
+ */
+ if (ctx->dead) {
+ spin_unlock_irq(&ctx->ctx_lock);
+ ret = -EINVAL;
+ goto out_put_req;
+ }
aio_run_iocb(req);
if (!list_empty(&ctx->run_list)) {
/* drain the run list */
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 333a7bb4cb9c..889287019599 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -873,6 +873,11 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj);
if (ret)
goto out_del;
+ /*
+ * bdev could be deleted beneath us which would implicitly destroy
+ * the holder directory. Hold on to it.
+ */
+ kobject_get(bdev->bd_part->holder_dir);
list_add(&holder->list, &bdev->bd_holder_disks);
goto out_unlock;
@@ -909,6 +914,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj);
del_symlink(bdev->bd_part->holder_dir,
&disk_to_dev(disk)->kobj);
+ kobject_put(bdev->bd_part->holder_dir);
list_del_init(&holder->list);
kfree(holder);
}
@@ -922,14 +928,15 @@ EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
* flush_disk - invalidates all buffer-cache entries on a disk
*
* @bdev: struct block device to be flushed
+ * @kill_dirty: flag to guide handling of dirty inodes
*
* Invalidates all buffer-cache entries on a disk. It should be called
* when a disk has been changed -- either by a media change or online
* resize.
*/
-static void flush_disk(struct block_device *bdev)
+static void flush_disk(struct block_device *bdev, bool kill_dirty)
{
- if (__invalidate_device(bdev)) {
+ if (__invalidate_device(bdev, kill_dirty)) {
char name[BDEVNAME_SIZE] = "";
if (bdev->bd_disk)
@@ -966,7 +973,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
"%s: detected capacity change from %lld to %lld\n",
name, bdev_size, disk_size);
i_size_write(bdev->bd_inode, disk_size);
- flush_disk(bdev);
+ flush_disk(bdev, false);
}
}
EXPORT_SYMBOL(check_disk_size_change);
@@ -1019,7 +1026,7 @@ int check_disk_change(struct block_device *bdev)
if (!(events & DISK_EVENT_MEDIA_CHANGE))
return 0;
- flush_disk(bdev);
+ flush_disk(bdev, true);
if (bdops->revalidate_disk)
bdops->revalidate_disk(bdev->bd_disk);
return 1;
@@ -1215,12 +1222,6 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
res = __blkdev_get(bdev, mode, 0);
- /* __blkdev_get() may alter read only status, check it afterwards */
- if (!res && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
- __blkdev_put(bdev, mode, 0);
- res = -EACCES;
- }
-
if (whole) {
/* finish claiming */
mutex_lock(&bdev->bd_mutex);
@@ -1298,6 +1299,11 @@ struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
if (err)
return ERR_PTR(err);
+ if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+ blkdev_put(bdev, mode);
+ return ERR_PTR(-EACCES);
+ }
+
return bdev;
}
EXPORT_SYMBOL(blkdev_get_by_path);
@@ -1601,7 +1607,7 @@ fail:
}
EXPORT_SYMBOL(lookup_bdev);
-int __invalidate_device(struct block_device *bdev)
+int __invalidate_device(struct block_device *bdev, bool kill_dirty)
{
struct super_block *sb = get_super(bdev);
int res = 0;
@@ -1614,7 +1620,7 @@ int __invalidate_device(struct block_device *bdev)
* hold).
*/
shrink_dcache_sb(sb);
- res = invalidate_inodes(sb);
+ res = invalidate_inodes(sb, kill_dirty);
drop_super(sb);
}
invalidate_bdev(bdev);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c98b3af6052..7f78cc78fdd0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
u64 disk_total; /* total bytes on disk, takes mirrors into
account */
+ /*
+ * we bump reservation progress every time we decrement
+ * bytes_reserved. This way people waiting for reservations
+ * know something good has happened and they can check
+ * for progress. The number here isn't to be trusted, it
+ * just shows reclaim activity
+ */
+ unsigned long reservation_progress;
+
int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
@@ -1254,6 +1263,7 @@ struct btrfs_root {
#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
+#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2228,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
u64 start, u64 end);
int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
u64 num_bytes);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ff27d7a477b2..b4ffad859adb 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
int len = *max_len;
int type;
- if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
- (connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
+ if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
+ *max_len = BTRFS_FID_SIZE_CONNECTABLE;
return 255;
+ } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
+ *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
+ return 255;
+ }
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f3c96fc01439..7b3089b5c2df 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
u64 max_reclaim;
u64 reclaimed = 0;
long time_left;
- int pause = 1;
int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
+ unsigned long progress;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
smp_mb();
reserved = space_info->bytes_reserved;
+ progress = space_info->reservation_progress;
if (reserved == 0)
return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
spin_lock(&space_info->lock);
- if (reserved > space_info->bytes_reserved) {
- loops = 0;
+ if (reserved > space_info->bytes_reserved)
reclaimed += reserved - space_info->bytes_reserved;
- } else {
- loops++;
- }
reserved = space_info->bytes_reserved;
spin_unlock(&space_info->lock);
+ loops++;
+
if (reserved == 0 || reclaimed >= max_reclaim)
break;
if (trans && trans->transaction->blocked)
return -EAGAIN;
- __set_current_state(TASK_INTERRUPTIBLE);
- time_left = schedule_timeout(pause);
+ time_left = schedule_timeout_interruptible(1);
/* We were interrupted, exit */
if (time_left)
break;
- pause <<= 1;
- if (pause > HZ / 10)
- pause = HZ / 10;
+ /* we've kicked the IO a few times, if anything has been freed,
+ * exit. There is no sense in looping here for a long time
+ * when we really need to commit the transaction, or there are
+ * just too many writers without enough free space
+ */
+
+ if (loops > 3) {
+ smp_mb();
+ if (progress != space_info->reservation_progress)
+ break;
+ }
}
return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
if (num_bytes) {
spin_lock(&space_info->lock);
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
}
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_reserved -= num_bytes;
+ sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
to_reserve = 0;
}
spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
btrfs_set_block_group_used(&cache->item, old_val);
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
cache->space_info->bytes_used += num_bytes;
cache->space_info->disk_used += num_bytes * factor;
spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
if (reserved) {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (ret) {
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_reserved -= buf->len;
+ cache->space_info->reservation_progress++;
spin_unlock(&cache->space_info->lock);
}
goto out;
@@ -5376,7 +5387,7 @@ again:
num_bytes, data, 1);
goto again;
}
- if (ret == -ENOSPC) {
+ if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
struct btrfs_space_info *sinfo;
sinfo = __find_space_info(root->fs_info, data);
@@ -8065,6 +8076,13 @@ out:
return ret;
}
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 type)
+{
+ u64 alloc_flags = get_alloc_profile(root, type);
+ return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+}
+
/*
* helper to account the unused space of all the readonly block group in the
* list. takes mirrors into account.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 92ac5192c518..714adc4ac4c2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
*/
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 max_bytes,
- unsigned long bits)
+ unsigned long bits, int contig)
{
struct rb_node *node;
struct extent_state *state;
u64 cur_start = *start;
u64 total_bytes = 0;
+ u64 last = 0;
int found = 0;
if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
state = rb_entry(node, struct extent_state, rb_node);
if (state->start > search_end)
break;
- if (state->end >= cur_start && (state->state & bits)) {
+ if (contig && found && state->start > last + 1)
+ break;
+ if (state->end >= cur_start && (state->state & bits) == bits) {
total_bytes += min(search_end, state->end) + 1 -
max(cur_start, state->start);
if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
*start = state->start;
found = 1;
}
+ last = state->end;
+ } else if (contig && found) {
+ break;
}
node = rb_next(node);
if (!node)
@@ -2912,6 +2918,46 @@ out:
return sector;
}
+/*
+ * helper function for fiemap, which doesn't want to see any holes.
+ * This maps until we find something past 'last'
+ */
+static struct extent_map *get_extent_skip_holes(struct inode *inode,
+ u64 offset,
+ u64 last,
+ get_extent_t *get_extent)
+{
+ u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
+ struct extent_map *em;
+ u64 len;
+
+ if (offset >= last)
+ return NULL;
+
+ while(1) {
+ len = last - offset;
+ if (len == 0)
+ break;
+ len = (len + sectorsize - 1) & ~(sectorsize - 1);
+ em = get_extent(inode, NULL, 0, offset, len, 0);
+ if (!em || IS_ERR(em))
+ return em;
+
+ /* if this isn't a hole return it */
+ if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
+ em->block_start != EXTENT_MAP_HOLE) {
+ return em;
+ }
+
+ /* this is a hole, advance to the next extent */
+ offset = extent_map_end(em);
+ free_extent_map(em);
+ if (offset >= last)
+ break;
+ }
+ return NULL;
+}
+
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u32 flags = 0;
u32 found_type;
u64 last;
+ u64 last_for_get_extent = 0;
u64 disko = 0;
+ u64 isize = i_size_read(inode);
struct btrfs_key found_key;
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
struct btrfs_file_extent_item *item;
int end = 0;
- u64 em_start = 0, em_len = 0;
+ u64 em_start = 0;
+ u64 em_len = 0;
+ u64 em_end = 0;
unsigned long emflags;
- int hole = 0;
if (len == 0)
return -EINVAL;
@@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return -ENOMEM;
path->leave_spinning = 1;
+ /*
+ * lookup the last file extent. We're not using i_size here
+ * because there might be preallocation past i_size
+ */
ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
path, inode->i_ino, -1, 0);
if (ret < 0) {
@@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
found_type = btrfs_key_type(&found_key);
- /* No extents, just return */
+ /* No extents, but there might be delalloc bits */
if (found_key.objectid != inode->i_ino ||
found_type != BTRFS_EXTENT_DATA_KEY) {
- btrfs_free_path(path);
- return 0;
+ /* have to trust i_size as the end */
+ last = (u64)-1;
+ last_for_get_extent = isize;
+ } else {
+ /*
+ * remember the start of the last extent. There are a
+ * bunch of different factors that go into the length of the
+ * extent, so its much less complex to remember where it started
+ */
+ last = found_key.offset;
+ last_for_get_extent = last + 1;
}
- last = found_key.offset;
btrfs_free_path(path);
+ /*
+ * we might have some extents allocated but more delalloc past those
+ * extents. so, we trust isize unless the start of the last extent is
+ * beyond isize
+ */
+ if (last < isize) {
+ last = (u64)-1;
+ last_for_get_extent = isize;
+ }
+
lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
&cached_state, GFP_NOFS);
- em = get_extent(inode, NULL, 0, off, max - off, 0);
+
+ em = get_extent_skip_holes(inode, off, last_for_get_extent,
+ get_extent);
if (!em)
goto out;
if (IS_ERR(em)) {
@@ -2973,22 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
while (!end) {
- hole = 0;
- off = em->start + em->len;
- if (off >= max)
- end = 1;
+ u64 offset_in_extent;
- if (em->block_start == EXTENT_MAP_HOLE) {
- hole = 1;
- goto next;
- }
+ /* break if the extent we found is outside the range */
+ if (em->start >= max || extent_map_end(em) < off)
+ break;
- em_start = em->start;
- em_len = em->len;
+ /*
+ * get_extent may return an extent that starts before our
+ * requested range. We have to make sure the ranges
+ * we return to fiemap always move forward and don't
+ * overlap, so adjust the offsets here
+ */
+ em_start = max(em->start, off);
+ /*
+ * record the offset from the start of the extent
+ * for adjusting the disk offset below
+ */
+ offset_in_extent = em_start - em->start;
+ em_end = extent_map_end(em);
+ em_len = em_end - em_start;
+ emflags = em->flags;
disko = 0;
flags = 0;
+ /*
+ * bump off for our next call to get_extent
+ */
+ off = extent_map_end(em);
+ if (off >= max)
+ end = 1;
+
if (em->block_start == EXTENT_MAP_LAST_BYTE) {
end = 1;
flags |= FIEMAP_EXTENT_LAST;
@@ -2999,42 +3088,34 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
} else {
- disko = em->block_start;
+ disko = em->block_start + offset_in_extent;
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
-next:
- emflags = em->flags;
free_extent_map(em);
em = NULL;
- if (!end) {
- em = get_extent(inode, NULL, 0, off, max - off, 0);
- if (!em)
- goto out;
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out;
- }
- emflags = em->flags;
- }
-
- if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
+ if ((em_start >= last) || em_len == (u64)-1 ||
+ (last == (u64)-1 && isize <= em_end)) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
- if (em_start == last) {
+ /* now scan forward to see if this is really the last extent. */
+ em = get_extent_skip_holes(inode, off, last_for_get_extent,
+ get_extent);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+ if (!em) {
flags |= FIEMAP_EXTENT_LAST;
end = 1;
}
-
- if (!hole) {
- ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
- em_len, flags);
- if (ret)
- goto out_free;
- }
+ ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+ em_len, flags);
+ if (ret)
+ goto out_free;
}
out_free:
free_extent_map(em);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 7083cfafd061..9318dfefd59c 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -191,7 +191,7 @@ void extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
- u64 max_bytes, unsigned long bits);
+ u64 max_bytes, unsigned long bits, int contig);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7084140d5940..f447b783bb84 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
/* Flush processor's dcache for this page */
flush_dcache_page(page);
+
+ /*
+ * if we get a partial write, we can end up with
+ * partially up to date pages. These add
+ * a lot of complexity, so make sure they don't
+ * happen by forcing this copy to be retried.
+ *
+ * The rest of the btrfs_file_write code will fall
+ * back to page at a time copies after we return 0.
+ */
+ if (!PageUptodate(page) && copied < count)
+ copied = 0;
+
iov_iter_advance(i, copied);
write_bytes -= copied;
total_copied += copied;
@@ -763,6 +776,27 @@ out:
}
/*
+ * on error we return an unlocked page and the error value
+ * on success we return a locked page and 0
+ */
+static int prepare_uptodate_page(struct page *page, u64 pos)
+{
+ int ret = 0;
+
+ if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
+ ret = btrfs_readpage(NULL, page);
+ if (ret)
+ return ret;
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
+/*
* this gets pages into the page cache and locks them down, it also properly
* waits for data=ordered extents to finish before allowing the pages to be
* modified.
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
unsigned long index = pos >> PAGE_CACHE_SHIFT;
struct inode *inode = fdentry(file)->d_inode;
int err = 0;
+ int faili = 0;
u64 start_pos;
u64 last_pos;
@@ -794,15 +829,24 @@ again:
for (i = 0; i < num_pages; i++) {
pages[i] = grab_cache_page(inode->i_mapping, index + i);
if (!pages[i]) {
- int c;
- for (c = i - 1; c >= 0; c--) {
- unlock_page(pages[c]);
- page_cache_release(pages[c]);
- }
- return -ENOMEM;
+ faili = i - 1;
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ if (i == 0)
+ err = prepare_uptodate_page(pages[i], pos);
+ if (i == num_pages - 1)
+ err = prepare_uptodate_page(pages[i],
+ pos + write_bytes);
+ if (err) {
+ page_cache_release(pages[i]);
+ faili = i - 1;
+ goto fail;
}
wait_on_page_writeback(pages[i]);
}
+ err = 0;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
WARN_ON(!PageLocked(pages[i]));
}
return 0;
+fail:
+ while (faili >= 0) {
+ unlock_page(pages[faili]);
+ page_cache_release(pages[faili]);
+ faili--;
+ }
+ return err;
+
}
static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct page *pinned[2];
struct page **pages = NULL;
struct iov_iter i;
loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
- pinned[0] = NULL;
- pinned[1] = NULL;
-
start_pos = pos;
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
first_index = pos >> PAGE_CACHE_SHIFT;
last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
- /*
- * there are lots of better ways to do this, but this code
- * makes sure the first and last page in the file range are
- * up to date and ready for cow
- */
- if ((pos & (PAGE_CACHE_SIZE - 1))) {
- pinned[0] = grab_cache_page(inode->i_mapping, first_index);
- if (!PageUptodate(pinned[0])) {
- ret = btrfs_readpage(NULL, pinned[0]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[0]);
- } else {
- unlock_page(pinned[0]);
- }
- }
- if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
- pinned[1] = grab_cache_page(inode->i_mapping, last_index);
- if (!PageUptodate(pinned[1])) {
- ret = btrfs_readpage(NULL, pinned[1]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[1]);
- } else {
- unlock_page(pinned[1]);
- }
- }
-
while (iov_iter_count(&i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, &i);
- dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+
+ /*
+ * if we have trouble faulting in the pages, fall
+ * back to one page at a time
+ */
+ if (copied < write_bytes)
+ nrptrs = 1;
+
+ if (copied == 0)
+ dirty_pages = 0;
+ else
+ dirty_pages = (copied + offset +
+ PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
if (num_pages > dirty_pages) {
if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
err = ret;
kfree(pages);
- if (pinned[0])
- page_cache_release(pinned[0]);
- if (pinned[1])
- page_cache_release(pinned[1]);
*ppos = pos;
/*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fb9bd7832b6d..512c3d1da083 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -90,13 +90,14 @@ static noinline int cow_file_range(struct inode *inode,
unsigned long *nr_written, int unlock);
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir)
+ struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int err;
err = btrfs_init_acl(trans, inode, dir);
if (!err)
- err = btrfs_xattr_security_init(trans, inode, dir);
+ err = btrfs_xattr_security_init(trans, inode, dir, qstr);
return err;
}
@@ -1913,7 +1914,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
private = 0;
if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
- (u64)-1, 1, EXTENT_DIRTY)) {
+ (u64)-1, 1, EXTENT_DIRTY, 0)) {
ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
start, &private_failure);
if (ret == 0) {
@@ -4704,7 +4705,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(trans, inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -4765,7 +4766,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(trans, inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -4806,9 +4807,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
int err;
int drop_inode = 0;
- if (inode->i_nlink == 0)
- return -ENOENT;
-
/* do not allow sys_link's with other subvols of the same device */
if (root->objectid != BTRFS_I(inode)->root->objectid)
return -EPERM;
@@ -4821,10 +4819,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
/*
- * 1 item for inode ref
+ * 2 items for inode and inode ref
* 2 items for dir items
+ * 1 item for parent inode
*/
- trans = btrfs_start_transaction(root, 3);
+ trans = btrfs_start_transaction(root, 5);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto fail;
@@ -4893,7 +4892,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
drop_on_err = 1;
- err = btrfs_init_inode_security(trans, inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
goto out_fail;
@@ -5280,6 +5279,128 @@ out:
return em;
}
+struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+ size_t pg_offset, u64 start, u64 len,
+ int create)
+{
+ struct extent_map *em;
+ struct extent_map *hole_em = NULL;
+ u64 range_start = start;
+ u64 end;
+ u64 found;
+ u64 found_end;
+ int err = 0;
+
+ em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
+ if (IS_ERR(em))
+ return em;
+ if (em) {
+ /*
+ * if our em maps to a hole, there might
+ * actually be delalloc bytes behind it
+ */
+ if (em->block_start != EXTENT_MAP_HOLE)
+ return em;
+ else
+ hole_em = em;
+ }
+
+ /* check to see if we've wrapped (len == -1 or similar) */
+ end = start + len;
+ if (end < start)
+ end = (u64)-1;
+ else
+ end -= 1;
+
+ em = NULL;
+
+ /* ok, we didn't find anything, lets look for delalloc */
+ found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+ end, len, EXTENT_DELALLOC, 1);
+ found_end = range_start + found;
+ if (found_end < range_start)
+ found_end = (u64)-1;
+
+ /*
+ * we didn't find anything useful, return
+ * the original results from get_extent()
+ */
+ if (range_start > end || found_end <= start) {
+ em = hole_em;
+ hole_em = NULL;
+ goto out;
+ }
+
+ /* adjust the range_start to make sure it doesn't
+ * go backwards from the start they passed in
+ */
+ range_start = max(start,range_start);
+ found = found_end - range_start;
+
+ if (found > 0) {
+ u64 hole_start = start;
+ u64 hole_len = len;
+
+ em = alloc_extent_map(GFP_NOFS);
+ if (!em) {
+ err = -ENOMEM;
+ goto out;
+ }
+ /*
+ * when btrfs_get_extent can't find anything it
+ * returns one huge hole
+ *
+ * make sure what it found really fits our range, and
+ * adjust to make sure it is based on the start from
+ * the caller
+ */
+ if (hole_em) {
+ u64 calc_end = extent_map_end(hole_em);
+
+ if (calc_end <= start || (hole_em->start > end)) {
+ free_extent_map(hole_em);
+ hole_em = NULL;
+ } else {
+ hole_start = max(hole_em->start, start);
+ hole_len = calc_end - hole_start;
+ }
+ }
+ em->bdev = NULL;
+ if (hole_em && range_start > hole_start) {
+ /* our hole starts before our delalloc, so we
+ * have to return just the parts of the hole
+ * that go until the delalloc starts
+ */
+ em->len = min(hole_len,
+ range_start - hole_start);
+ em->start = hole_start;
+ em->orig_start = hole_start;
+ /*
+ * don't adjust block start at all,
+ * it is fixed at EXTENT_MAP_HOLE
+ */
+ em->block_start = hole_em->block_start;
+ em->block_len = hole_len;
+ } else {
+ em->start = range_start;
+ em->len = found;
+ em->orig_start = range_start;
+ em->block_start = EXTENT_MAP_DELALLOC;
+ em->block_len = found;
+ }
+ } else if (hole_em) {
+ return hole_em;
+ }
+out:
+
+ free_extent_map(hole_em);
+ if (err) {
+ free_extent_map(em);
+ return ERR_PTR(err);
+ }
+ return em;
+}
+
static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
u64 start, u64 len)
{
@@ -5934,6 +6055,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
if (!skip_sum) {
dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
if (!dip->csums) {
+ kfree(dip);
ret = -ENOMEM;
goto free_ordered;
}
@@ -6102,7 +6224,7 @@ out:
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
- return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
+ return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
}
int btrfs_readpage(struct file *file, struct page *page)
@@ -6982,7 +7104,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(trans, inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err) {
drop_inode = 1;
goto out_unlock;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index be2d4f6aaa5e..5fdb2abc4fa7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1071,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
- if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
+ if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
return -EINVAL;
if (flags & ~BTRFS_SUBVOL_RDONLY)
return -EOPNOTSUPP;
+ if (!is_owner_or_cap(inode))
+ return -EACCES;
+
down_write(&root->fs_info->subvol_sem);
/* nothing to do */
@@ -1097,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
goto out_reset;
}
- ret = btrfs_update_root(trans, root,
+ ret = btrfs_update_root(trans, root->fs_info->tree_root,
&root->root_key, &root->root_item);
btrfs_commit_transaction(trans, root);
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index cc9b450399df..a178f5ebea78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
unsigned long tot_out;
unsigned long tot_len;
char *buf;
+ bool may_late_unmap, need_unmap;
data_in = kmap(pages_in[0]);
tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
tot_in += in_len;
working_bytes = in_len;
+ may_late_unmap = need_unmap = false;
/* fast path: avoid using the working buffer */
if (in_page_bytes_left >= in_len) {
buf = data_in + in_offset;
bytes = in_len;
+ may_late_unmap = true;
goto cont;
}
@@ -329,14 +332,17 @@ cont:
if (working_bytes == 0 && tot_in >= tot_len)
break;
- kunmap(pages_in[page_in_index]);
- page_in_index++;
- if (page_in_index >= total_pages_in) {
+ if (page_in_index + 1 >= total_pages_in) {
ret = -1;
- data_in = NULL;
goto done;
}
- data_in = kmap(pages_in[page_in_index]);
+
+ if (may_late_unmap)
+ need_unmap = true;
+ else
+ kunmap(pages_in[page_in_index]);
+
+ data_in = kmap(pages_in[++page_in_index]);
in_page_bytes_left = PAGE_CACHE_SIZE;
in_offset = 0;
@@ -346,6 +352,8 @@ cont:
out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
&out_len);
+ if (need_unmap)
+ kunmap(pages_in[page_in_index - 1]);
if (ret != LZO_E_OK) {
printk(KERN_WARNING "btrfs decompress failed\n");
ret = -1;
@@ -363,8 +371,7 @@ cont:
break;
}
done:
- if (data_in)
- kunmap(pages_in[page_in_index]);
+ kunmap(pages_in[page_in_index]);
return ret;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0825e4ed9447..31ade5802ae8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3654,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
u32 item_size;
int ret;
int err = 0;
+ int progress = 0;
path = btrfs_alloc_path();
if (!path)
@@ -3666,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
while (1) {
+ progress++;
trans = btrfs_start_transaction(rc->extent_root, 0);
BUG_ON(IS_ERR(trans));
-
+restart:
if (update_backref_cache(trans, &rc->backref_cache)) {
btrfs_end_transaction(trans, rc->extent_root);
continue;
@@ -3781,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
}
}
}
+ if (trans && progress && err == -ENOSPC) {
+ ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
+ rc->block_group->flags);
+ if (ret == 0) {
+ err = 0;
+ progress = 0;
+ goto restart;
+ }
+ }
btrfs_release_path(rc->extent_root, path);
clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a004008f7d28..d39a9895d932 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -155,7 +155,8 @@ enum {
Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
- Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
+ Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
+ Opt_enospc_debug, Opt_err,
};
static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
{Opt_space_cache, "space_cache"},
{Opt_clear_cache, "clear_cache"},
{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
+ {Opt_enospc_debug, "enospc_debug"},
{Opt_err, NULL},
};
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_user_subvol_rm_allowed:
btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
break;
+ case Opt_enospc_debug:
+ btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
+ break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index af7dbca15276..dd13eb81ee40 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1338,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
ret = btrfs_shrink_device(device, 0);
if (ret)
- goto error_brelse;
+ goto error_undo;
ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
if (ret)
- goto error_brelse;
+ goto error_undo;
device->in_fs_metadata = 0;
@@ -1416,6 +1416,13 @@ out:
mutex_unlock(&root->fs_info->volume_mutex);
mutex_unlock(&uuid_mutex);
return ret;
+error_undo:
+ if (device->writeable) {
+ list_add(&device->dev_alloc_list,
+ &root->fs_info->fs_devices->alloc_list);
+ root->fs_info->fs_devices->rw_devices++;
+ }
+ goto error_brelse;
}
/*
@@ -1633,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->dev_root = root->fs_info->dev_root;
device->bdev = bdev;
device->in_fs_metadata = 1;
- device->mode = 0;
+ device->mode = FMODE_EXCL;
set_blocksize(device->bdev, 4096);
if (seeding_dev) {
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5776531dc2b..d779cefcfd7d 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -370,7 +370,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
}
int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir)
+ struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int err;
size_t len;
@@ -378,7 +379,8 @@ int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
char *suffix;
char *name;
- err = security_inode_init_security(inode, dir, &suffix, &value, &len);
+ err = security_inode_init_security(inode, dir, qstr, &suffix, &value,
+ &len);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 7a43fd640bbb..b3cc8039134b 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -37,6 +37,7 @@ extern int btrfs_setxattr(struct dentry *dentry, const char *name,
extern int btrfs_removexattr(struct dentry *dentry, const char *name);
extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
- struct inode *inode, struct inode *dir);
+ struct inode *inode, struct inode *dir,
+ const struct qstr *qstr);
#endif /* __XATTR__ */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 42c7fafc8bfe..a0358c2189cb 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -275,6 +275,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
bool preemptive)
{
struct dentry *grave, *trap;
+ struct path path, path_to_graveyard;
char nbuffer[8 + 8 + 1];
int ret;
@@ -287,10 +288,18 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
/* non-directories can just be unlinked */
if (!S_ISDIR(rep->d_inode->i_mode)) {
_debug("unlink stale object");
- ret = vfs_unlink(dir->d_inode, rep);
- if (preemptive)
- cachefiles_mark_object_buried(cache, rep);
+ path.mnt = cache->mnt;
+ path.dentry = dir;
+ ret = security_path_unlink(&path, rep);
+ if (ret < 0) {
+ cachefiles_io_error(cache, "Unlink security error");
+ } else {
+ ret = vfs_unlink(dir->d_inode, rep);
+
+ if (preemptive)
+ cachefiles_mark_object_buried(cache, rep);
+ }
mutex_unlock(&dir->d_inode->i_mutex);
@@ -379,12 +388,23 @@ try_again:
}
/* attempt the rename */
- ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave);
- if (ret != 0 && ret != -ENOMEM)
- cachefiles_io_error(cache, "Rename failed with error %d", ret);
+ path.mnt = cache->mnt;
+ path.dentry = dir;
+ path_to_graveyard.mnt = cache->mnt;
+ path_to_graveyard.dentry = cache->graveyard;
+ ret = security_path_rename(&path, rep, &path_to_graveyard, grave);
+ if (ret < 0) {
+ cachefiles_io_error(cache, "Rename security error %d", ret);
+ } else {
+ ret = vfs_rename(dir->d_inode, rep,
+ cache->graveyard->d_inode, grave);
+ if (ret != 0 && ret != -ENOMEM)
+ cachefiles_io_error(cache,
+ "Rename failed with error %d", ret);
- if (preemptive)
- cachefiles_mark_object_buried(cache, rep);
+ if (preemptive)
+ cachefiles_mark_object_buried(cache, rep);
+ }
unlock_rename(cache->graveyard, dir);
dput(grave);
@@ -448,6 +468,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
{
struct cachefiles_cache *cache;
struct dentry *dir, *next = NULL;
+ struct path path;
unsigned long start;
const char *name;
int ret, nlen;
@@ -458,6 +479,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
cache = container_of(parent->fscache.cache,
struct cachefiles_cache, cache);
+ path.mnt = cache->mnt;
ASSERT(parent->dentry);
ASSERT(parent->dentry->d_inode);
@@ -511,6 +533,10 @@ lookup_again:
if (ret < 0)
goto create_error;
+ path.dentry = dir;
+ ret = security_path_mkdir(&path, next, 0);
+ if (ret < 0)
+ goto create_error;
start = jiffies;
ret = vfs_mkdir(dir->d_inode, next, 0);
cachefiles_hist(cachefiles_mkdir_histogram, start);
@@ -536,6 +562,10 @@ lookup_again:
if (ret < 0)
goto create_error;
+ path.dentry = dir;
+ ret = security_path_mknod(&path, next, S_IFREG, 0);
+ if (ret < 0)
+ goto create_error;
start = jiffies;
ret = vfs_create(dir->d_inode, next, S_IFREG, NULL);
cachefiles_hist(cachefiles_create_histogram, start);
@@ -692,6 +722,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
{
struct dentry *subdir;
unsigned long start;
+ struct path path;
int ret;
_enter(",,%s", dirname);
@@ -719,6 +750,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
_debug("attempt mkdir");
+ path.mnt = cache->mnt;
+ path.dentry = dir;
+ ret = security_path_mkdir(&path, subdir, 0700);
+ if (ret < 0)
+ goto mkdir_error;
ret = vfs_mkdir(dir->d_inode, subdir, 0700);
if (ret < 0)
goto mkdir_error;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 0bc68de8edd7..ebafa65a29b6 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -409,7 +409,7 @@ more:
spin_lock(&inode->i_lock);
if (ci->i_release_count == fi->dir_release_count) {
dout(" marking %p complete\n", inode);
- ci->i_ceph_flags |= CEPH_I_COMPLETE;
+ /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
ci->i_max_offset = filp->f_pos;
}
spin_unlock(&inode->i_lock);
@@ -496,6 +496,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
/* .snap dir? */
if (err == -ENOENT &&
+ ceph_snap(parent) == CEPH_NOSNAP &&
strcmp(dentry->d_name.name,
fsc->mount_options->snapdir_name) == 0) {
struct inode *inode = ceph_get_snapdir(parent);
@@ -992,7 +993,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct inode *dir;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
dir = dentry->d_parent->d_inode;
@@ -1029,28 +1030,8 @@ out_touch:
static void ceph_dentry_release(struct dentry *dentry)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
- struct inode *parent_inode = NULL;
- u64 snapid = CEPH_NOSNAP;
- if (!IS_ROOT(dentry)) {
- parent_inode = dentry->d_parent->d_inode;
- if (parent_inode)
- snapid = ceph_snap(parent_inode);
- }
- dout("dentry_release %p parent %p\n", dentry, parent_inode);
- if (parent_inode && snapid != CEPH_SNAPDIR) {
- struct ceph_inode_info *ci = ceph_inode(parent_inode);
-
- spin_lock(&parent_inode->i_lock);
- if (ci->i_shared_gen == di->lease_shared_gen ||
- snapid <= CEPH_MAXSNAP) {
- dout(" clearing %p complete (d_release)\n",
- parent_inode);
- ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
- ci->i_release_count++;
- }
- spin_unlock(&parent_inode->i_lock);
- }
+ dout("dentry_release %p\n", dentry);
if (di) {
ceph_dentry_lru_del(dentry);
if (di->lease_session)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5625463aa479..193bfa5e9cbd 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -707,7 +707,7 @@ static int fill_inode(struct inode *inode,
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
(ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
dout(" marking %p complete (empty)\n", inode);
- ci->i_ceph_flags |= CEPH_I_COMPLETE;
+ /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
ci->i_max_offset = 2;
}
break;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 39c243acd062..f40b9139e437 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -584,10 +584,14 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
if (lastinode)
iput(lastinode);
- dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino);
- list_for_each_entry(child, &realm->children, child_item)
- queue_realm_cap_snaps(child);
+ list_for_each_entry(child, &realm->children, child_item) {
+ dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
+ realm, realm->ino, child, child->ino);
+ list_del_init(&child->dirty_item);
+ list_add(&child->dirty_item, &realm->dirty_item);
+ }
+ list_del_init(&realm->dirty_item);
dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
}
@@ -683,7 +687,9 @@ more:
* queue cap snaps _after_ we've built the new snap contexts,
* so that i_head_snapc can be set appropriately.
*/
- list_for_each_entry(realm, &dirty_realms, dirty_item) {
+ while (!list_empty(&dirty_realms)) {
+ realm = list_first_entry(&dirty_realms, struct ceph_snap_realm,
+ dirty_item);
queue_realm_cap_snaps(realm);
}
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 4a3330235d55..a9371b6578c0 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -127,5 +127,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* EXPERIMENTAL */
-#define CIFS_VERSION "1.70"
+#define CIFS_VERSION "1.71"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 8d9189f64477..79f641eeda30 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -170,7 +170,7 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
{
int rc, alen, slen;
const char *pct;
- char *endp, scope_id[13];
+ char scope_id[13];
struct sockaddr_in *s4 = (struct sockaddr_in *) dst;
struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst;
@@ -197,9 +197,9 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len)
memcpy(scope_id, pct + 1, slen);
scope_id[slen] = '\0';
- s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0);
- if (endp != scope_id + slen)
- return 0;
+ rc = strict_strtoul(scope_id, 0,
+ (unsigned long *)&s6->sin6_scope_id);
+ rc = (rc == 0) ? 1 : 0;
}
return rc;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 1adc9625a344..16765703131b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -656,13 +656,13 @@ ssetup_ntlmssp_authenticate:
if (type == LANMAN) {
#ifdef CONFIG_CIFS_WEAK_PW_HASH
- char lnm_session_key[CIFS_SESS_KEY_SIZE];
+ char lnm_session_key[CIFS_AUTH_RESP_SIZE];
pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
/* no capabilities flags in old lanman negotiation */
- pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
+ pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
/* Calculate hash with password and copy into bcc_ptr.
* Encryption Key (stored as in cryptkey) gets used if the
@@ -675,8 +675,8 @@ ssetup_ntlmssp_authenticate:
true : false, lnm_session_key);
ses->flags |= CIFS_SES_LANMAN;
- memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_SESS_KEY_SIZE);
- bcc_ptr += CIFS_SESS_KEY_SIZE;
+ memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
/* can not sign if LANMAN negotiated so no need
to calculate signing key? but what if server
diff --git a/fs/compat.c b/fs/compat.c
index f6fd0a00e6cc..c6d31a3bab88 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
*/
asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
{
- struct path path;
- int error;
-
- error = user_path(pathname, &path);
- if (!error) {
- struct kstatfs tmp;
- error = vfs_statfs(&path, &tmp);
- if (!error)
- error = put_compat_statfs(buf, &tmp);
- path_put(&path);
- }
+ struct kstatfs tmp;
+ int error = user_statfs(pathname, &tmp);
+ if (!error)
+ error = put_compat_statfs(buf, &tmp);
return error;
}
asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
{
- struct file * file;
struct kstatfs tmp;
- int error;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = vfs_statfs(&file->f_path, &tmp);
+ int error = fd_statfs(fd, &tmp);
if (!error)
error = put_compat_statfs(buf, &tmp);
- fput(file);
-out:
return error;
}
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
{
- struct path path;
+ struct kstatfs tmp;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = user_path(pathname, &path);
- if (!error) {
- struct kstatfs tmp;
- error = vfs_statfs(&path, &tmp);
- if (!error)
- error = put_compat_statfs64(buf, &tmp);
- path_put(&path);
- }
+ error = user_statfs(pathname, &tmp);
+ if (!error)
+ error = put_compat_statfs64(buf, &tmp);
return error;
}
asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
{
- struct file * file;
struct kstatfs tmp;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = vfs_statfs(&file->f_path, &tmp);
+ error = fd_statfs(fd, &tmp);
if (!error)
error = put_compat_statfs64(buf, &tmp);
- fput(file);
-out:
return error;
}
@@ -1228,7 +1200,9 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_readv(file, vec, vlen, &pos);
+ ret = -ESPIPE;
+ if (file->f_mode & FMODE_PREAD)
+ ret = compat_readv(file, vec, vlen, &pos);
fput_light(file, fput_needed);
return ret;
}
@@ -1285,7 +1259,9 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_writev(file, vec, vlen, &pos);
+ ret = -ESPIPE;
+ if (file->f_mode & FMODE_PWRITE)
+ ret = compat_writev(file, vec, vlen, &pos);
fput_light(file, fput_needed);
return ret;
}
@@ -2308,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
}
#endif /* CONFIG_TIMERFD */
+
+#ifdef CONFIG_FHANDLE
+/*
+ * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
+ * doesn't set the O_LARGEFILE flag.
+ */
+asmlinkage long
+compat_sys_open_by_handle_at(int mountdirfd,
+ struct file_handle __user *handle, int flags)
+{
+ return do_handle_open(mountdirfd, handle, flags);
+}
+#endif
diff --git a/fs/dcache.c b/fs/dcache.c
index 2a6bd9a4ae97..a39fe47c466f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
__releases(parent->d_lock)
__releases(dentry->d_inode->i_lock)
{
- dentry->d_parent = NULL;
list_del(&dentry->d_u.d_child);
+ /*
+ * Inform try_to_ascend() that we are no longer attached to the
+ * dentry tree
+ */
+ dentry->d_flags |= DCACHE_DISCONNECTED;
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
@@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb)
}
/*
+ * This tries to ascend one level of parenthood, but
+ * we can race with renaming, so we need to re-check
+ * the parenthood after dropping the lock and check
+ * that the sequence number still matches.
+ */
+static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
+{
+ struct dentry *new = old->d_parent;
+
+ rcu_read_lock();
+ spin_unlock(&old->d_lock);
+ spin_lock(&new->d_lock);
+
+ /*
+ * might go back up the wrong parent if we have had a rename
+ * or deletion
+ */
+ if (new != old->d_parent ||
+ (old->d_flags & DCACHE_DISCONNECTED) ||
+ (!locked && read_seqretry(&rename_lock, seq))) {
+ spin_unlock(&new->d_lock);
+ new = NULL;
+ }
+ rcu_read_unlock();
+ return new;
+}
+
+
+/*
* Search for at least 1 mount point in the dentry's subdirs.
* We descend to the next level whenever the d_subdirs
* list is non-empty and continue searching.
@@ -1066,24 +1099,10 @@ resume:
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
@@ -1181,24 +1200,10 @@ resume:
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ struct dentry *child = this_parent;
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
@@ -1523,6 +1528,28 @@ struct dentry * d_alloc_root(struct inode * root_inode)
}
EXPORT_SYMBOL(d_alloc_root);
+static struct dentry * __d_find_any_alias(struct inode *inode)
+{
+ struct dentry *alias;
+
+ if (list_empty(&inode->i_dentry))
+ return NULL;
+ alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
+ __dget(alias);
+ return alias;
+}
+
+static struct dentry * d_find_any_alias(struct inode *inode)
+{
+ struct dentry *de;
+
+ spin_lock(&inode->i_lock);
+ de = __d_find_any_alias(inode);
+ spin_unlock(&inode->i_lock);
+ return de;
+}
+
+
/**
* d_obtain_alias - find or allocate a dentry for a given inode
* @inode: inode to allocate the dentry for
@@ -1552,7 +1579,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
if (IS_ERR(inode))
return ERR_CAST(inode);
- res = d_find_alias(inode);
+ res = d_find_any_alias(inode);
if (res)
goto out_iput;
@@ -1565,7 +1592,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
spin_lock(&inode->i_lock);
- res = __d_find_alias(inode, 0);
+ res = __d_find_any_alias(inode);
if (res) {
spin_unlock(&inode->i_lock);
dput(tmp);
@@ -2920,28 +2947,14 @@ resume:
spin_unlock(&dentry->d_lock);
}
if (this_parent != root) {
- struct dentry *tmp;
- struct dentry *child;
-
- tmp = this_parent->d_parent;
+ struct dentry *child = this_parent;
if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
this_parent->d_flags |= DCACHE_GENOCIDE;
this_parent->d_count--;
}
- rcu_read_lock();
- spin_unlock(&this_parent->d_lock);
- child = this_parent;
- this_parent = tmp;
- spin_lock(&this_parent->d_lock);
- /* might go back up the wrong parent if we have had a rename
- * or deletion */
- if (this_parent != child->d_parent ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&this_parent->d_lock);
- rcu_read_unlock();
+ this_parent = try_to_ascend(this_parent, locked, seq);
+ if (!this_parent)
goto rename_retry;
- }
- rcu_read_unlock();
next = child->d_u.d_child.next;
goto resume;
}
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 6fc4f319b550..534c1d46e69e 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -46,24 +46,28 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct dentry *lower_dentry;
struct vfsmount *lower_mnt;
- struct dentry *dentry_save;
- struct vfsmount *vfsmount_save;
+ struct dentry *dentry_save = NULL;
+ struct vfsmount *vfsmount_save = NULL;
int rc = 1;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
lower_dentry = ecryptfs_dentry_to_lower(dentry);
lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
goto out;
- dentry_save = nd->path.dentry;
- vfsmount_save = nd->path.mnt;
- nd->path.dentry = lower_dentry;
- nd->path.mnt = lower_mnt;
+ if (nd) {
+ dentry_save = nd->path.dentry;
+ vfsmount_save = nd->path.mnt;
+ nd->path.dentry = lower_dentry;
+ nd->path.mnt = lower_mnt;
+ }
rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
- nd->path.dentry = dentry_save;
- nd->path.mnt = vfsmount_save;
+ if (nd) {
+ nd->path.dentry = dentry_save;
+ nd->path.mnt = vfsmount_save;
+ }
if (dentry->d_inode) {
struct inode *lower_inode =
ecryptfs_inode_to_lower(dentry->d_inode);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index dbc84ed96336..e00753496e3e 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -632,8 +632,7 @@ int ecryptfs_interpose(struct dentry *hidden_dentry,
u32 flags);
int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
struct dentry *lower_dentry,
- struct inode *ecryptfs_dir_inode,
- struct nameidata *ecryptfs_nd);
+ struct inode *ecryptfs_dir_inode);
int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
size_t *decrypted_name_size,
struct dentry *ecryptfs_dentry,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 81e10e6a9443..7d1050e254f9 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -317,6 +317,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
const struct file_operations ecryptfs_dir_fops = {
.readdir = ecryptfs_readdir,
+ .read = generic_read_dir,
.unlocked_ioctl = ecryptfs_unlocked_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ecryptfs_compat_ioctl,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index bd33f87a1907..b592938a84bc 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -74,16 +74,20 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
unsigned int flags_save;
int rc;
- dentry_save = nd->path.dentry;
- vfsmount_save = nd->path.mnt;
- flags_save = nd->flags;
- nd->path.dentry = lower_dentry;
- nd->path.mnt = lower_mnt;
- nd->flags &= ~LOOKUP_OPEN;
+ if (nd) {
+ dentry_save = nd->path.dentry;
+ vfsmount_save = nd->path.mnt;
+ flags_save = nd->flags;
+ nd->path.dentry = lower_dentry;
+ nd->path.mnt = lower_mnt;
+ nd->flags &= ~LOOKUP_OPEN;
+ }
rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
- nd->path.dentry = dentry_save;
- nd->path.mnt = vfsmount_save;
- nd->flags = flags_save;
+ if (nd) {
+ nd->path.dentry = dentry_save;
+ nd->path.mnt = vfsmount_save;
+ nd->flags = flags_save;
+ }
return rc;
}
@@ -241,8 +245,7 @@ out:
*/
int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
struct dentry *lower_dentry,
- struct inode *ecryptfs_dir_inode,
- struct nameidata *ecryptfs_nd)
+ struct inode *ecryptfs_dir_inode)
{
struct dentry *lower_dir_dentry;
struct vfsmount *lower_mnt;
@@ -290,8 +293,6 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
goto out;
if (special_file(lower_inode->i_mode))
goto out;
- if (!ecryptfs_nd)
- goto out;
/* Released in this function */
page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER);
if (!page_virt) {
@@ -349,75 +350,6 @@ out:
}
/**
- * ecryptfs_new_lower_dentry
- * @name: The name of the new dentry.
- * @lower_dir_dentry: Parent directory of the new dentry.
- * @nd: nameidata from last lookup.
- *
- * Create a new dentry or get it from lower parent dir.
- */
-static struct dentry *
-ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry,
- struct nameidata *nd)
-{
- struct dentry *new_dentry;
- struct dentry *tmp;
- struct inode *lower_dir_inode;
-
- lower_dir_inode = lower_dir_dentry->d_inode;
-
- tmp = d_alloc(lower_dir_dentry, name);
- if (!tmp)
- return ERR_PTR(-ENOMEM);
-
- mutex_lock(&lower_dir_inode->i_mutex);
- new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd);
- mutex_unlock(&lower_dir_inode->i_mutex);
-
- if (!new_dentry)
- new_dentry = tmp;
- else
- dput(tmp);
-
- return new_dentry;
-}
-
-
-/**
- * ecryptfs_lookup_one_lower
- * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
- * @lower_dir_dentry: lower parent directory
- * @name: lower file name
- *
- * Get the lower dentry from vfs. If lower dentry does not exist yet,
- * create it.
- */
-static struct dentry *
-ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry,
- struct dentry *lower_dir_dentry, struct qstr *name)
-{
- struct nameidata nd;
- struct vfsmount *lower_mnt;
- int err;
-
- lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
- ecryptfs_dentry->d_parent));
- err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd);
- mntput(lower_mnt);
-
- if (!err) {
- /* we dont need the mount */
- mntput(nd.path.mnt);
- return nd.path.dentry;
- }
- if (err != -ENOENT)
- return ERR_PTR(err);
-
- /* create a new lower dentry */
- return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd);
-}
-
-/**
* ecryptfs_lookup
* @ecryptfs_dir_inode: The eCryptfs directory inode
* @ecryptfs_dentry: The eCryptfs dentry that we are looking up
@@ -434,7 +366,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
size_t encrypted_and_encoded_name_size;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat = NULL;
struct dentry *lower_dir_dentry, *lower_dentry;
- struct qstr lower_name;
int rc = 0;
if ((ecryptfs_dentry->d_name.len == 1
@@ -444,20 +375,14 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
goto out_d_drop;
}
lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
- lower_name.name = ecryptfs_dentry->d_name.name;
- lower_name.len = ecryptfs_dentry->d_name.len;
- lower_name.hash = ecryptfs_dentry->d_name.hash;
- if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
- rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
- lower_dir_dentry->d_inode, &lower_name);
- if (rc < 0)
- goto out_d_drop;
- }
- lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
- lower_dir_dentry, &lower_name);
+ mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
+ lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
+ lower_dir_dentry,
+ ecryptfs_dentry->d_name.len);
+ mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
- ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
+ ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
"[%d] on lower_dentry = [%s]\n", __func__, rc,
encrypted_and_encoded_name);
goto out_d_drop;
@@ -479,28 +404,21 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
"filename; rc = [%d]\n", __func__, rc);
goto out_d_drop;
}
- lower_name.name = encrypted_and_encoded_name;
- lower_name.len = encrypted_and_encoded_name_size;
- lower_name.hash = full_name_hash(lower_name.name, lower_name.len);
- if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
- rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
- lower_dir_dentry->d_inode, &lower_name);
- if (rc < 0)
- goto out_d_drop;
- }
- lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
- lower_dir_dentry, &lower_name);
+ mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
+ lower_dentry = lookup_one_len(encrypted_and_encoded_name,
+ lower_dir_dentry,
+ encrypted_and_encoded_name_size);
+ mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
- ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
+ ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
"[%d] on lower_dentry = [%s]\n", __func__, rc,
encrypted_and_encoded_name);
goto out_d_drop;
}
lookup_and_interpose:
rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry,
- ecryptfs_dir_inode,
- ecryptfs_nd);
+ ecryptfs_dir_inode);
goto out;
out_d_drop:
d_drop(ecryptfs_dentry);
@@ -1092,6 +1010,8 @@ int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
ecryptfs_dentry_to_lower(dentry), &lower_stat);
if (!rc) {
+ fsstack_copy_attr_all(dentry->d_inode,
+ ecryptfs_inode_to_lower(dentry->d_inode));
generic_fillattr(dentry->d_inode, stat);
stat->blocks = lower_stat.blocks;
}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index e0194b3e14d6..d9a591773919 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -99,7 +99,7 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_get);
* @ctx: [in] Pointer to eventfd context.
*
* The eventfd context reference must have been previously acquired either
- * with eventfd_ctx_get() or eventfd_ctx_fdget()).
+ * with eventfd_ctx_get() or eventfd_ctx_fdget().
*/
void eventfd_ctx_put(struct eventfd_ctx *ctx)
{
@@ -146,9 +146,9 @@ static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
* eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
* @ctx: [in] Pointer to eventfd context.
* @wait: [in] Wait queue to be removed.
- * @cnt: [out] Pointer to the 64bit conter value.
+ * @cnt: [out] Pointer to the 64-bit counter value.
*
- * Returns zero if successful, or the following error codes:
+ * Returns %0 if successful, or the following error codes:
*
* -EAGAIN : The operation would have blocked.
*
@@ -175,11 +175,11 @@ EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
* eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
* @ctx: [in] Pointer to eventfd context.
* @no_wait: [in] Different from zero if the operation should not block.
- * @cnt: [out] Pointer to the 64bit conter value.
+ * @cnt: [out] Pointer to the 64-bit counter value.
*
- * Returns zero if successful, or the following error codes:
+ * Returns %0 if successful, or the following error codes:
*
- * -EAGAIN : The operation would have blocked but @no_wait was nonzero.
+ * -EAGAIN : The operation would have blocked but @no_wait was non-zero.
* -ERESTARTSYS : A signal interrupted the wait operation.
*
* If @no_wait is zero, the function might sleep until the eventfd internal
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 267d0ada4541..4a09af9e9a63 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -63,6 +63,13 @@
* cleanup path and it is also acquired by eventpoll_release_file()
* if a file has been pushed inside an epoll set and it is then
* close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
+ * It is also acquired when inserting an epoll fd onto another epoll
+ * fd. We do this so that we walk the epoll tree and ensure that this
+ * insertion does not create a cycle of epoll file descriptors, which
+ * could lead to deadlock. We need a global mutex to prevent two
+ * simultaneous inserts (A into B and B into A) from racing and
+ * constructing a cycle without either insert observing that it is
+ * going to.
* It is possible to drop the "ep->mtx" and to use the global
* mutex "epmutex" (together with "ep->lock") to have it working,
* but having "ep->mtx" will make the interface more scalable.
@@ -224,6 +231,9 @@ static long max_user_watches __read_mostly;
*/
static DEFINE_MUTEX(epmutex);
+/* Used to check for epoll file descriptor inclusion loops */
+static struct nested_calls poll_loop_ncalls;
+
/* Used for safe wake up implementation */
static struct nested_calls poll_safewake_ncalls;
@@ -1198,6 +1208,62 @@ retry:
return res;
}
+/**
+ * ep_loop_check_proc - Callback function to be passed to the @ep_call_nested()
+ * API, to verify that adding an epoll file inside another
+ * epoll structure, does not violate the constraints, in
+ * terms of closed loops, or too deep chains (which can
+ * result in excessive stack usage).
+ *
+ * @priv: Pointer to the epoll file to be currently checked.
+ * @cookie: Original cookie for this call. This is the top-of-the-chain epoll
+ * data structure pointer.
+ * @call_nests: Current dept of the @ep_call_nested() call stack.
+ *
+ * Returns: Returns zero if adding the epoll @file inside current epoll
+ * structure @ep does not violate the constraints, or -1 otherwise.
+ */
+static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
+{
+ int error = 0;
+ struct file *file = priv;
+ struct eventpoll *ep = file->private_data;
+ struct rb_node *rbp;
+ struct epitem *epi;
+
+ mutex_lock(&ep->mtx);
+ for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
+ epi = rb_entry(rbp, struct epitem, rbn);
+ if (unlikely(is_file_epoll(epi->ffd.file))) {
+ error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ ep_loop_check_proc, epi->ffd.file,
+ epi->ffd.file->private_data, current);
+ if (error != 0)
+ break;
+ }
+ }
+ mutex_unlock(&ep->mtx);
+
+ return error;
+}
+
+/**
+ * ep_loop_check - Performs a check to verify that adding an epoll file (@file)
+ * another epoll file (represented by @ep) does not create
+ * closed loops or too deep chains.
+ *
+ * @ep: Pointer to the epoll private data structure.
+ * @file: Pointer to the epoll file to be checked.
+ *
+ * Returns: Returns zero if adding the epoll @file inside current epoll
+ * structure @ep does not violate the constraints, or -1 otherwise.
+ */
+static int ep_loop_check(struct eventpoll *ep, struct file *file)
+{
+ return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ ep_loop_check_proc, file, ep, current);
+}
+
/*
* Open an eventpoll file descriptor.
*/
@@ -1246,6 +1312,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
struct epoll_event __user *, event)
{
int error;
+ int did_lock_epmutex = 0;
struct file *file, *tfile;
struct eventpoll *ep;
struct epitem *epi;
@@ -1287,6 +1354,25 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
*/
ep = file->private_data;
+ /*
+ * When we insert an epoll file descriptor, inside another epoll file
+ * descriptor, there is the change of creating closed loops, which are
+ * better be handled here, than in more critical paths.
+ *
+ * We hold epmutex across the loop check and the insert in this case, in
+ * order to prevent two separate inserts from racing and each doing the
+ * insert "at the same time" such that ep_loop_check passes on both
+ * before either one does the insert, thereby creating a cycle.
+ */
+ if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
+ mutex_lock(&epmutex);
+ did_lock_epmutex = 1;
+ error = -ELOOP;
+ if (ep_loop_check(ep, tfile) != 0)
+ goto error_tgt_fput;
+ }
+
+
mutex_lock(&ep->mtx);
/*
@@ -1322,6 +1408,9 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_unlock(&ep->mtx);
error_tgt_fput:
+ if (unlikely(did_lock_epmutex))
+ mutex_unlock(&epmutex);
+
fput(tfile);
error_fput:
fput(file);
@@ -1441,6 +1530,12 @@ static int __init eventpoll_init(void)
EP_ITEM_COST;
BUG_ON(max_user_watches < 0);
+ /*
+ * Initialize the structure used to perform epoll file descriptor
+ * inclusion loops checks.
+ */
+ ep_nested_calls_init(&poll_loop_ncalls);
+
/* Initialize the structure used to perform safe poll wait head wake ups */
ep_nested_calls_init(&poll_safewake_ncalls);
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6ab..ba99e1abb1aa 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
struct file *file;
char *tmp = getname(library);
int error = PTR_ERR(tmp);
+ static const struct open_flags uselib_flags = {
+ .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
+ .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
+ .intent = LOOKUP_OPEN
+ };
if (IS_ERR(tmp))
goto out;
- file = do_filp_open(AT_FDCWD, tmp,
- O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
- MAY_READ | MAY_EXEC | MAY_OPEN);
+ file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
putname(tmp);
error = PTR_ERR(file);
if (IS_ERR(file))
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name)
{
struct file *file;
int err;
+ static const struct open_flags open_exec_flags = {
+ .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
+ .acc_mode = MAY_EXEC | MAY_OPEN,
+ .intent = LOOKUP_OPEN
+ };
- file = do_filp_open(AT_FDCWD, name,
- O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
- MAY_EXEC | MAY_OPEN);
+ file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
if (IS_ERR(file))
goto out;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 264e95d02830..4d70db110cfc 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
err = exofs_set_link(new_dir, new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME;
if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= EXOFS_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = exofs_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_ctime = CURRENT_TIME;
exofs_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4b6825740dd5..b05acb796135 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
struct inode * inode = dentry->d_inode;
int len = *max_len;
int type = FILEID_INO32_GEN;
-
- if (len < 2 || (connectable && len < 4))
+
+ if (connectable && (len < 4)) {
+ *max_len = 4;
+ return 255;
+ } else if (len < 2) {
+ *max_len = 2;
return 255;
+ }
len = 2;
fid->i32.ino = inode->i_ino;
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
/*
* Try to get any dentry for the given file handle from the filesystem.
*/
+ if (!nop || !nop->fh_to_dentry)
+ return ERR_PTR(-ESTALE);
result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
if (!result)
result = ERR_PTR(-ESTALE);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 6346a2acf326..1b48c3370872 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -110,7 +110,7 @@ extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
/* ialloc.c */
-extern struct inode * ext2_new_inode (struct inode *, int);
+extern struct inode * ext2_new_inode (struct inode *, int, const struct qstr *);
extern void ext2_free_inode (struct inode *);
extern unsigned long ext2_count_free_inodes (struct super_block *);
extern void ext2_check_inodes_bitmap (struct super_block *);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index ad70479aabff..ee9ed31948e1 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -429,7 +429,8 @@ found:
return group;
}
-struct inode *ext2_new_inode(struct inode *dir, int mode)
+struct inode *ext2_new_inode(struct inode *dir, int mode,
+ const struct qstr *qstr)
{
struct super_block *sb;
struct buffer_head *bitmap_bh = NULL;
@@ -585,7 +586,7 @@ got:
if (err)
goto fail_free_drop;
- err = ext2_init_security(inode,dir);
+ err = ext2_init_security(inode, dir, qstr);
if (err)
goto fail_free_drop;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2e1d8341d827..ed5c5d496ee9 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -104,7 +104,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
dquot_initialize(dir);
- inode = ext2_new_inode(dir, mode);
+ inode = ext2_new_inode(dir, mode, &dentry->d_name);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -133,7 +133,7 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_
dquot_initialize(dir);
- inode = ext2_new_inode (dir, mode);
+ inode = ext2_new_inode (dir, mode, &dentry->d_name);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
@@ -159,7 +159,7 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
dquot_initialize(dir);
- inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO);
+ inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO, &dentry->d_name);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out;
@@ -230,7 +230,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
inode_inc_link_count(dir);
- inode = ext2_new_inode (dir, S_IFDIR | mode);
+ inode = ext2_new_inode(dir, S_IFDIR | mode, &dentry->d_name);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_dir;
@@ -344,7 +344,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -356,12 +355,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
if (new_dir->i_nlink >= EXT2_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = ext2_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
@@ -369,12 +365,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
/*
* Like most other Unix systems, set the ctime for inodes on a
* rename.
- * inode_dec_link_count() will mark the inode dirty.
*/
old_inode->i_ctime = CURRENT_TIME_SEC;
+ mark_inode_dirty(old_inode);
ext2_delete_entry (old_de, old_page);
- inode_dec_link_count(old_inode);
if (dir_de) {
if (old_dir != new_dir)
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index a1a1c2184616..5e41cccff762 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -116,9 +116,11 @@ exit_ext2_xattr(void)
# endif /* CONFIG_EXT2_FS_XATTR */
#ifdef CONFIG_EXT2_FS_SECURITY
-extern int ext2_init_security(struct inode *inode, struct inode *dir);
+extern int ext2_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr);
#else
-static inline int ext2_init_security(struct inode *inode, struct inode *dir)
+static inline int ext2_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
return 0;
}
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 3004e15d5da5..5d979b4347b0 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -47,14 +47,15 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name,
}
int
-ext2_init_security(struct inode *inode, struct inode *dir)
+ext2_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int err;
size_t len;
void *value;
char *name;
- err = security_inode_init_security(inode, dir, &name, &value, &len);
+ err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 9724aef22460..bfc2dc43681d 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -404,7 +404,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
* For other inodes, search forward from the parent directory's block
* group to find a free inode.
*/
-struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
+struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
+ const struct qstr *qstr, int mode)
{
struct super_block *sb;
struct buffer_head *bitmap_bh = NULL;
@@ -589,7 +590,7 @@ got:
if (err)
goto fail_free_drop;
- err = ext3_init_security(handle,inode, dir);
+ err = ext3_init_security(handle, inode, dir, qstr);
if (err)
goto fail_free_drop;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b27ba71810ec..0521a007ae6d 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1710,7 +1710,7 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
- inode = ext3_new_inode (handle, dir, mode);
+ inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
inode->i_op = &ext3_file_inode_operations;
@@ -1746,7 +1746,7 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
- inode = ext3_new_inode (handle, dir, mode);
+ inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
@@ -1784,7 +1784,7 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
- inode = ext3_new_inode (handle, dir, S_IFDIR | mode);
+ inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFDIR | mode);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
@@ -2206,7 +2206,7 @@ retry:
if (IS_DIRSYNC(dir))
handle->h_sync = 1;
- inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
+ inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFLNK|S_IRWXUGO);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_stop;
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry,
dquot_initialize(dir);
- /*
- * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
- * otherwise has the potential to corrupt the orphan inode list.
- */
- if (inode->i_nlink == 0)
- return -ENOENT;
-
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8f2473..9cc19a1dea8e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1936,6 +1936,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
sb->s_qcop = &ext3_qctl_operations;
sb->dq_op = &ext3_quota_operations;
#endif
+ memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
mutex_init(&sbi->s_resize_lock);
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index 377fe7201169..2be4f69bfa64 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -128,10 +128,10 @@ exit_ext3_xattr(void)
#ifdef CONFIG_EXT3_FS_SECURITY
extern int ext3_init_security(handle_t *handle, struct inode *inode,
- struct inode *dir);
+ struct inode *dir, const struct qstr *qstr);
#else
static inline int ext3_init_security(handle_t *handle, struct inode *inode,
- struct inode *dir)
+ struct inode *dir, const struct qstr *qstr)
{
return 0;
}
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 03a99bfc59f9..b8d9f83aa5c5 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -49,14 +49,15 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
}
int
-ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
+ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int err;
size_t len;
void *value;
char *name;
- err = security_inode_init_security(inode, dir, &name, &value, &len);
+ err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index eb9097aec6f0..78b79e1bd7ed 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1042,7 +1042,7 @@ got:
if (err)
goto fail_free_drop;
- err = ext4_init_security(handle, inode, dir);
+ err = ext4_init_security(handle, inode, dir, qstr);
if (err)
goto fail_free_drop;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5485390d32c5..e781b7ea5630 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry,
dquot_initialize(dir);
- /*
- * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
- * otherwise has the potential to corrupt the orphan inode list.
- */
- if (inode->i_nlink == 0)
- return -ENOENT;
-
retry:
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f6a318f836b2..203f9e4a70be 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3415,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_qcop = &ext4_qctl_operations;
sb->dq_op = &ext4_quota_operations;
#endif
+ memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
+
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
mutex_init(&sbi->s_resize_lock);
@@ -3509,7 +3511,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
no_journal:
- EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
+ /*
+ * The maximum number of concurrent works can be high and
+ * concurrency isn't really necessary. Limit it to 1.
+ */
+ EXT4_SB(sb)->dio_unwritten_wq =
+ alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1);
if (!EXT4_SB(sb)->dio_unwritten_wq) {
printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
goto failed_mount_wq;
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 1ef16520b950..25b7387ff183 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -145,10 +145,10 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
#ifdef CONFIG_EXT4_FS_SECURITY
extern int ext4_init_security(handle_t *handle, struct inode *inode,
- struct inode *dir);
+ struct inode *dir, const struct qstr *qstr);
#else
static inline int ext4_init_security(handle_t *handle, struct inode *inode,
- struct inode *dir)
+ struct inode *dir, const struct qstr *qstr)
{
return 0;
}
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 9b21268e121c..007c3bfbf094 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -49,14 +49,15 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
}
int
-ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
+ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int err;
size_t len;
void *value;
char *name;
- err = security_inode_init_security(inode, dir, &name, &value, &len);
+ err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe10bd1..0e277ec4b612 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
struct inode *inode = de->d_inode;
u32 ipos_h, ipos_m, ipos_l;
- if (len < 5)
+ if (len < 5) {
+ *lenp = 5;
return 255; /* no room */
+ }
ipos_h = MSDOS_I(inode)->i_pos >> 8;
ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index f88f752babd9..adae3fb7451a 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,7 +43,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/* This is not negative dentry. Always valid. */
@@ -54,7 +54,7 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/*
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cb1026181bdc..6c82e5bac039 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
SYSCALL_DEFINE1(dup, unsigned int, fildes)
{
int ret = -EBADF;
- struct file *file = fget(fildes);
+ struct file *file = fget_raw(fildes);
if (file) {
ret = get_unused_fd();
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
return err;
}
+static int check_fcntl_cmd(unsigned cmd)
+{
+ switch (cmd) {
+ case F_DUPFD:
+ case F_DUPFD_CLOEXEC:
+ case F_GETFD:
+ case F_SETFD:
+ case F_GETFL:
+ return 1;
+ }
+ return 0;
+}
+
SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
struct file *filp;
long err = -EBADF;
- filp = fget(fd);
+ filp = fget_raw(fd);
if (!filp)
goto out;
+ if (unlikely(filp->f_mode & FMODE_PATH)) {
+ if (!check_fcntl_cmd(cmd)) {
+ fput(filp);
+ goto out;
+ }
+ }
+
err = security_file_fcntl(filp, cmd, arg);
if (err) {
fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
long err;
err = -EBADF;
- filp = fget(fd);
+ filp = fget_raw(fd);
if (!filp)
goto out;
+ if (unlikely(filp->f_mode & FMODE_PATH)) {
+ if (!check_fcntl_cmd(cmd)) {
+ fput(filp);
+ goto out;
+ }
+ }
+
err = security_file_fcntl(filp, cmd, arg);
if (err) {
fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
* is defined as O_NONBLOCK on some platforms and not on others.
*/
- BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+ BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
O_RDONLY | O_WRONLY | O_RDWR |
O_CREAT | O_EXCL | O_NOCTTY |
O_TRUNC | O_APPEND | /* O_NONBLOCK | */
__O_SYNC | O_DSYNC | FASYNC |
O_DIRECT | O_LARGEFILE | O_DIRECTORY |
O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
- __FMODE_EXEC
+ __FMODE_EXEC | O_PATH
));
fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fhandle.c b/fs/fhandle.c
new file mode 100644
index 000000000000..bf93ad2bee07
--- /dev/null
+++ b/fs/fhandle.c
@@ -0,0 +1,265 @@
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/exportfs.h>
+#include <linux/fs_struct.h>
+#include <linux/fsnotify.h>
+#include <asm/uaccess.h>
+#include "internal.h"
+
+static long do_sys_name_to_handle(struct path *path,
+ struct file_handle __user *ufh,
+ int __user *mnt_id)
+{
+ long retval;
+ struct file_handle f_handle;
+ int handle_dwords, handle_bytes;
+ struct file_handle *handle = NULL;
+
+ /*
+ * We need t make sure wether the file system
+ * support decoding of the file handle
+ */
+ if (!path->mnt->mnt_sb->s_export_op ||
+ !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
+ return -EFAULT;
+
+ if (f_handle.handle_bytes > MAX_HANDLE_SZ)
+ return -EINVAL;
+
+ handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
+ GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ /* convert handle size to multiple of sizeof(u32) */
+ handle_dwords = f_handle.handle_bytes >> 2;
+
+ /* we ask for a non connected handle */
+ retval = exportfs_encode_fh(path->dentry,
+ (struct fid *)handle->f_handle,
+ &handle_dwords, 0);
+ handle->handle_type = retval;
+ /* convert handle size to bytes */
+ handle_bytes = handle_dwords * sizeof(u32);
+ handle->handle_bytes = handle_bytes;
+ if ((handle->handle_bytes > f_handle.handle_bytes) ||
+ (retval == 255) || (retval == -ENOSPC)) {
+ /* As per old exportfs_encode_fh documentation
+ * we could return ENOSPC to indicate overflow
+ * But file system returned 255 always. So handle
+ * both the values
+ */
+ /*
+ * set the handle size to zero so we copy only
+ * non variable part of the file_handle
+ */
+ handle_bytes = 0;
+ retval = -EOVERFLOW;
+ } else
+ retval = 0;
+ /* copy the mount id */
+ if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
+ copy_to_user(ufh, handle,
+ sizeof(struct file_handle) + handle_bytes))
+ retval = -EFAULT;
+ kfree(handle);
+ return retval;
+}
+
+/**
+ * sys_name_to_handle_at: convert name to handle
+ * @dfd: directory relative to which name is interpreted if not absolute
+ * @name: name that should be converted to handle.
+ * @handle: resulting file handle
+ * @mnt_id: mount id of the file system containing the file
+ * @flag: flag value to indicate whether to follow symlink or not
+ *
+ * @handle->handle_size indicate the space available to store the
+ * variable part of the file handle in bytes. If there is not
+ * enough space, the field is updated to return the minimum
+ * value required.
+ */
+SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
+ struct file_handle __user *, handle, int __user *, mnt_id,
+ int, flag)
+{
+ struct path path;
+ int lookup_flags;
+ int err;
+
+ if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
+ return -EINVAL;
+
+ lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
+ if (flag & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+ err = user_path_at(dfd, name, lookup_flags, &path);
+ if (!err) {
+ err = do_sys_name_to_handle(&path, handle, mnt_id);
+ path_put(&path);
+ }
+ return err;
+}
+
+static struct vfsmount *get_vfsmount_from_fd(int fd)
+{
+ struct path path;
+
+ if (fd == AT_FDCWD) {
+ struct fs_struct *fs = current->fs;
+ spin_lock(&fs->lock);
+ path = fs->pwd;
+ mntget(path.mnt);
+ spin_unlock(&fs->lock);
+ } else {
+ int fput_needed;
+ struct file *file = fget_light(fd, &fput_needed);
+ if (!file)
+ return ERR_PTR(-EBADF);
+ path = file->f_path;
+ mntget(path.mnt);
+ fput_light(file, fput_needed);
+ }
+ return path.mnt;
+}
+
+static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
+{
+ return 1;
+}
+
+static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
+ struct path *path)
+{
+ int retval = 0;
+ int handle_dwords;
+
+ path->mnt = get_vfsmount_from_fd(mountdirfd);
+ if (IS_ERR(path->mnt)) {
+ retval = PTR_ERR(path->mnt);
+ goto out_err;
+ }
+ /* change the handle size to multiple of sizeof(u32) */
+ handle_dwords = handle->handle_bytes >> 2;
+ path->dentry = exportfs_decode_fh(path->mnt,
+ (struct fid *)handle->f_handle,
+ handle_dwords, handle->handle_type,
+ vfs_dentry_acceptable, NULL);
+ if (IS_ERR(path->dentry)) {
+ retval = PTR_ERR(path->dentry);
+ goto out_mnt;
+ }
+ return 0;
+out_mnt:
+ mntput(path->mnt);
+out_err:
+ return retval;
+}
+
+static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
+ struct path *path)
+{
+ int retval = 0;
+ struct file_handle f_handle;
+ struct file_handle *handle = NULL;
+
+ /*
+ * With handle we don't look at the execute bit on the
+ * the directory. Ideally we would like CAP_DAC_SEARCH.
+ * But we don't have that
+ */
+ if (!capable(CAP_DAC_READ_SEARCH)) {
+ retval = -EPERM;
+ goto out_err;
+ }
+ if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
+ retval = -EFAULT;
+ goto out_err;
+ }
+ if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
+ (f_handle.handle_bytes == 0)) {
+ retval = -EINVAL;
+ goto out_err;
+ }
+ handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
+ GFP_KERNEL);
+ if (!handle) {
+ retval = -ENOMEM;
+ goto out_err;
+ }
+ /* copy the full handle */
+ if (copy_from_user(handle, ufh,
+ sizeof(struct file_handle) +
+ f_handle.handle_bytes)) {
+ retval = -EFAULT;
+ goto out_handle;
+ }
+
+ retval = do_handle_to_path(mountdirfd, handle, path);
+
+out_handle:
+ kfree(handle);
+out_err:
+ return retval;
+}
+
+long do_handle_open(int mountdirfd,
+ struct file_handle __user *ufh, int open_flag)
+{
+ long retval = 0;
+ struct path path;
+ struct file *file;
+ int fd;
+
+ retval = handle_to_path(mountdirfd, ufh, &path);
+ if (retval)
+ return retval;
+
+ fd = get_unused_fd_flags(open_flag);
+ if (fd < 0) {
+ path_put(&path);
+ return fd;
+ }
+ file = file_open_root(path.dentry, path.mnt, "", open_flag);
+ if (IS_ERR(file)) {
+ put_unused_fd(fd);
+ retval = PTR_ERR(file);
+ } else {
+ retval = fd;
+ fsnotify_open(file);
+ fd_install(fd, file);
+ }
+ path_put(&path);
+ return retval;
+}
+
+/**
+ * sys_open_by_handle_at: Open the file handle
+ * @mountdirfd: directory file descriptor
+ * @handle: file handle to be opened
+ * @flag: open flags.
+ *
+ * @mountdirfd indicate the directory file descriptor
+ * of the mount point. file handle is decoded relative
+ * to the vfsmount pointed by the @mountdirfd. @flags
+ * value is same as the open(2) flags.
+ */
+SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
+ struct file_handle __user *, handle,
+ int, flags)
+{
+ long ret;
+
+ if (force_o_largefile())
+ flags |= O_LARGEFILE;
+
+ ret = do_handle_open(mountdirfd, handle, flags);
+ return ret;
+}
diff --git a/fs/file_table.c b/fs/file_table.c
index eb36b6b17e26..bfab973c6c5b 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -190,7 +190,8 @@ struct file *alloc_file(struct path *path, fmode_t mode,
file_take_write(file);
WARN_ON(mnt_clone_write(path->mnt));
}
- ima_counts_get(file);
+ if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+ i_readcount_inc(path->dentry->d_inode);
return file;
}
EXPORT_SYMBOL(alloc_file);
@@ -251,6 +252,8 @@ static void __fput(struct file *file)
fops_put(file->f_op);
put_pid(file->f_owner.pid);
file_sb_list_del(file);
+ if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+ i_readcount_dec(inode);
if (file->f_mode & FMODE_WRITE)
drop_file_write_access(file);
file->f_path.dentry = NULL;
@@ -276,11 +279,10 @@ struct file *fget(unsigned int fd)
rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
- if (!atomic_long_inc_not_zero(&file->f_count)) {
- /* File object ref couldn't be taken */
- rcu_read_unlock();
- return NULL;
- }
+ /* File object ref couldn't be taken */
+ if (file->f_mode & FMODE_PATH ||
+ !atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
}
rcu_read_unlock();
@@ -289,6 +291,25 @@ struct file *fget(unsigned int fd)
EXPORT_SYMBOL(fget);
+struct file *fget_raw(unsigned int fd)
+{
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ /* File object ref couldn't be taken */
+ if (!atomic_long_inc_not_zero(&file->f_count))
+ file = NULL;
+ }
+ rcu_read_unlock();
+
+ return file;
+}
+
+EXPORT_SYMBOL(fget_raw);
+
/*
* Lightweight file lookup - no refcnt increment if fd table isn't shared.
*
@@ -313,6 +334,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
*fput_needed = 0;
if (atomic_read(&files->count) == 1) {
file = fcheck_files(files, fd);
+ if (file && (file->f_mode & FMODE_PATH))
+ file = NULL;
+ } else {
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ if (!(file->f_mode & FMODE_PATH) &&
+ atomic_long_inc_not_zero(&file->f_count))
+ *fput_needed = 1;
+ else
+ /* Didn't get the reference, someone's freed */
+ file = NULL;
+ }
+ rcu_read_unlock();
+ }
+
+ return file;
+}
+
+struct file *fget_raw_light(unsigned int fd, int *fput_needed)
+{
+ struct file *file;
+ struct files_struct *files = current->files;
+
+ *fput_needed = 0;
+ if (atomic_read(&files->count) == 1) {
+ file = fcheck_files(files, fd);
} else {
rcu_read_lock();
file = fcheck_files(files, fd);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bfed8447ed80..8bd0ef9286c3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,7 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
{
struct inode *inode;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = entry->d_inode;
@@ -1283,8 +1283,11 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
if (err)
return err;
- if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
- return 0;
+ if (attr->ia_valid & ATTR_OPEN) {
+ if (fc->atomic_o_trunc)
+ return 0;
+ file = NULL;
+ }
if (attr->ia_valid & ATTR_SIZE)
is_truncate = true;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 95da1bc1c826..9e0832dbb1e3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -86,18 +86,52 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
return ff;
}
+static void fuse_release_async(struct work_struct *work)
+{
+ struct fuse_req *req;
+ struct fuse_conn *fc;
+ struct path path;
+
+ req = container_of(work, struct fuse_req, misc.release.work);
+ path = req->misc.release.path;
+ fc = get_fuse_conn(path.dentry->d_inode);
+
+ fuse_put_request(fc, req);
+ path_put(&path);
+}
+
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
- path_put(&req->misc.release.path);
+ if (fc->destroy_req) {
+ /*
+ * If this is a fuseblk mount, then it's possible that
+ * releasing the path will result in releasing the
+ * super block and sending the DESTROY request. If
+ * the server is single threaded, this would hang.
+ * For this reason do the path_put() in a separate
+ * thread.
+ */
+ atomic_inc(&req->count);
+ INIT_WORK(&req->misc.release.work, fuse_release_async);
+ schedule_work(&req->misc.release.work);
+ } else {
+ path_put(&req->misc.release.path);
+ }
}
-static void fuse_file_put(struct fuse_file *ff)
+static void fuse_file_put(struct fuse_file *ff, bool sync)
{
if (atomic_dec_and_test(&ff->count)) {
struct fuse_req *req = ff->reserved_req;
- req->end = fuse_release_end;
- fuse_request_send_background(ff->fc, req);
+ if (sync) {
+ fuse_request_send(ff->fc, req);
+ path_put(&req->misc.release.path);
+ fuse_put_request(ff->fc, req);
+ } else {
+ req->end = fuse_release_end;
+ fuse_request_send_background(ff->fc, req);
+ }
kfree(ff);
}
}
@@ -219,8 +253,12 @@ void fuse_release_common(struct file *file, int opcode)
* Normally this will send the RELEASE request, however if
* some asynchronous READ or WRITE requests are outstanding,
* the sending will be delayed.
+ *
+ * Make the release synchronous if this is a fuseblk mount,
+ * synchronous RELEASE is allowed (and desirable) in this case
+ * because the server can be trusted not to screw up.
*/
- fuse_file_put(ff);
+ fuse_file_put(ff, ff->fc->destroy_req != NULL);
}
static int fuse_open(struct inode *inode, struct file *file)
@@ -558,7 +596,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
page_cache_release(page);
}
if (req->ff)
- fuse_file_put(req->ff);
+ fuse_file_put(req->ff, false);
}
static void fuse_send_readpages(struct fuse_req *req, struct file *file)
@@ -1137,7 +1175,7 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
{
__free_page(req->pages[0]);
- fuse_file_put(req->ff);
+ fuse_file_put(req->ff, false);
}
static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ae5744a2f9e9..d4286947bc2c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,7 @@
#include <linux/rwsem.h>
#include <linux/rbtree.h>
#include <linux/poll.h>
+#include <linux/workqueue.h>
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
@@ -262,7 +263,10 @@ struct fuse_req {
/** Data for asynchronous requests */
union {
struct {
- struct fuse_release_in in;
+ union {
+ struct fuse_release_in in;
+ struct work_struct work;
+ };
struct path path;
} release;
struct fuse_init_in init_in;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68cc1bd1..051b1a084528 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
u64 nodeid;
u32 generation;
- if (*max_len < len)
+ if (*max_len < len) {
+ *max_len = len;
return 255;
+ }
nodeid = get_fuse_inode(inode)->nodeid;
generation = inode->i_generation;
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7118f1a780a9..cbc07155b1a0 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -80,8 +80,11 @@ int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
struct posix_acl *acl;
int error;
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ return -EAGAIN;
+ }
acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
if (IS_ERR(acl))
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4f36f8832b9b..aad77e4f61b5 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -695,6 +695,7 @@ out:
if (error == 0)
return 0;
+ unlock_page(page);
page_cache_release(page);
gfs2_trans_end(sdp);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3c4039d5eef1..ef3dc4b9fae2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -21,6 +21,7 @@
#include "meta_io.h"
#include "quota.h"
#include "rgrp.h"
+#include "super.h"
#include "trans.h"
#include "dir.h"
#include "util.h"
@@ -757,7 +758,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrp_list rlist;
u64 bn, bstart;
- u32 blen;
+ u32 blen, btotal;
__be64 *p;
unsigned int rg_blocks = 0;
int metadata;
@@ -839,6 +840,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
bstart = 0;
blen = 0;
+ btotal = 0;
for (p = top; p < bottom; p++) {
if (!*p)
@@ -851,9 +853,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
else {
if (bstart) {
if (metadata)
- gfs2_free_meta(ip, bstart, blen);
+ __gfs2_free_meta(ip, bstart, blen);
else
- gfs2_free_data(ip, bstart, blen);
+ __gfs2_free_data(ip, bstart, blen);
+
+ btotal += blen;
}
bstart = bn;
@@ -865,11 +869,17 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
}
if (bstart) {
if (metadata)
- gfs2_free_meta(ip, bstart, blen);
+ __gfs2_free_meta(ip, bstart, blen);
else
- gfs2_free_data(ip, bstart, blen);
+ __gfs2_free_data(ip, bstart, blen);
+
+ btotal += blen;
}
+ gfs2_statfs_change(sdp, 0, +btotal, 0);
+ gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
+ ip->i_inode.i_gid);
+
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 4a456338b873..0da8da2c991d 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
int error;
int had_lock = 0;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
parent = dget_parent(dentry);
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9023db8184f9..b5a5e60df0d5 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
struct super_block *sb = inode->i_sb;
struct gfs2_inode *ip = GFS2_I(inode);
- if (*len < GFS2_SMALL_FH_SIZE ||
- (connectable && *len < GFS2_LARGE_FH_SIZE))
+ if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
+ *len = GFS2_LARGE_FH_SIZE;
return 255;
+ } else if (*len < GFS2_SMALL_FH_SIZE) {
+ *len = GFS2_SMALL_FH_SIZE;
+ return 255;
+ }
fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 7cfdcb913363..4074b952b059 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -448,15 +448,20 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
{
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
- if (!(file->f_flags & O_NOATIME)) {
+ if (!(file->f_flags & O_NOATIME) &&
+ !IS_NOATIME(&ip->i_inode)) {
struct gfs2_holder i_gh;
int error;
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
error = gfs2_glock_nq(&i_gh);
- file_accessed(file);
- if (error == 0)
- gfs2_glock_dq_uninit(&i_gh);
+ if (error == 0) {
+ file_accessed(file);
+ gfs2_glock_dq(&i_gh);
+ }
+ gfs2_holder_uninit(&i_gh);
+ if (error)
+ return error;
}
vma->vm_ops = &gfs2_vm_ops;
vma->vm_flags |= VM_CAN_NONLINEAR;
@@ -617,8 +622,7 @@ static void empty_write_end(struct page *page, unsigned from,
{
struct gfs2_inode *ip = GFS2_I(page->mapping->host);
- page_zero_new_buffers(page, from, to);
- flush_dcache_page(page);
+ zero_user(page, from, to-from);
mark_page_accessed(page);
if (!gfs2_is_writeback(ip))
@@ -627,36 +631,43 @@ static void empty_write_end(struct page *page, unsigned from,
block_commit_write(page, from, to);
}
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+static int needs_empty_write(sector_t block, struct inode *inode)
{
- unsigned start, end, next;
- struct buffer_head *bh, *head;
int error;
+ struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
- if (!page_has_buffers(page)) {
- error = __block_write_begin(page, from, to - from, gfs2_block_map);
- if (unlikely(error))
- return error;
+ bh_map.b_size = 1 << inode->i_blkbits;
+ error = gfs2_block_map(inode, block, &bh_map, 0);
+ if (unlikely(error))
+ return error;
+ return !buffer_mapped(&bh_map);
+}
- empty_write_end(page, from, to);
- return 0;
- }
+static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+{
+ struct inode *inode = page->mapping->host;
+ unsigned start, end, next, blksize;
+ sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ int ret;
- bh = head = page_buffers(page);
+ blksize = 1 << inode->i_blkbits;
next = end = 0;
while (next < from) {
- next += bh->b_size;
- bh = bh->b_this_page;
+ next += blksize;
+ block++;
}
start = next;
do {
- next += bh->b_size;
- if (buffer_mapped(bh)) {
+ next += blksize;
+ ret = needs_empty_write(block, inode);
+ if (unlikely(ret < 0))
+ return ret;
+ if (ret == 0) {
if (end) {
- error = __block_write_begin(page, start, end - start,
- gfs2_block_map);
- if (unlikely(error))
- return error;
+ ret = __block_write_begin(page, start, end - start,
+ gfs2_block_map);
+ if (unlikely(ret))
+ return ret;
empty_write_end(page, start, end);
end = 0;
}
@@ -664,13 +675,13 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
}
else
end = next;
- bh = bh->b_this_page;
+ block++;
} while (next < to);
if (end) {
- error = __block_write_begin(page, start, end - start, gfs2_block_map);
- if (unlikely(error))
- return error;
+ ret = __block_write_begin(page, start, end - start, gfs2_block_map);
+ if (unlikely(ret))
+ return ret;
empty_write_end(page, start, end);
}
@@ -976,8 +987,10 @@ static void do_unflock(struct file *file, struct file_lock *fl)
mutex_lock(&fp->f_fl_mutex);
flock_lock_file_wait(file, fl);
- if (fl_gh->gh_gl)
- gfs2_glock_dq_uninit(fl_gh);
+ if (fl_gh->gh_gl) {
+ gfs2_glock_dq_wait(fl_gh);
+ gfs2_holder_uninit(fl_gh);
+ }
mutex_unlock(&fp->f_fl_mutex);
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 08a8beb152e6..e2431313491f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -26,6 +26,9 @@
#include <linux/freezer.h>
#include <linux/workqueue.h>
#include <linux/jiffies.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist_bl.h>
+#include <linux/bit_spinlock.h>
#include "gfs2.h"
#include "incore.h"
@@ -41,10 +44,6 @@
#define CREATE_TRACE_POINTS
#include "trace_gfs2.h"
-struct gfs2_gl_hash_bucket {
- struct hlist_head hb_list;
-};
-
struct gfs2_glock_iter {
int hash; /* hash bucket index */
struct gfs2_sbd *sdp; /* incore superblock */
@@ -54,7 +53,6 @@ struct gfs2_glock_iter {
typedef void (*glock_examiner) (struct gfs2_glock * gl);
-static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
@@ -70,57 +68,9 @@ static DEFINE_SPINLOCK(lru_lock);
#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
-static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE];
+static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
static struct dentry *gfs2_root;
-/*
- * Despite what you might think, the numbers below are not arbitrary :-)
- * They are taken from the ipv4 routing hash code, which is well tested
- * and thus should be nearly optimal. Later on we might tweek the numbers
- * but for now this should be fine.
- *
- * The reason for putting the locks in a separate array from the list heads
- * is that we can have fewer locks than list heads and save memory. We use
- * the same hash function for both, but with a different hash mask.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
- defined(CONFIG_PROVE_LOCKING)
-
-#ifdef CONFIG_LOCKDEP
-# define GL_HASH_LOCK_SZ 256
-#else
-# if NR_CPUS >= 32
-# define GL_HASH_LOCK_SZ 4096
-# elif NR_CPUS >= 16
-# define GL_HASH_LOCK_SZ 2048
-# elif NR_CPUS >= 8
-# define GL_HASH_LOCK_SZ 1024
-# elif NR_CPUS >= 4
-# define GL_HASH_LOCK_SZ 512
-# else
-# define GL_HASH_LOCK_SZ 256
-# endif
-#endif
-
-/* We never want more locks than chains */
-#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ
-# undef GL_HASH_LOCK_SZ
-# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE
-#endif
-
-static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ];
-
-static inline rwlock_t *gl_lock_addr(unsigned int x)
-{
- return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
-}
-#else /* not SMP, so no spinlocks required */
-static inline rwlock_t *gl_lock_addr(unsigned int x)
-{
- return NULL;
-}
-#endif
-
/**
* gl_hash() - Turn glock number into hash bucket number
* @lock: The glock number
@@ -141,25 +91,35 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
return h;
}
-/**
- * glock_free() - Perform a few checks and then release struct gfs2_glock
- * @gl: The glock to release
- *
- * Also calls lock module to release its internal structure for this glock.
- *
- */
+static inline void spin_lock_bucket(unsigned int hash)
+{
+ struct hlist_bl_head *bl = &gl_hash_table[hash];
+ bit_spin_lock(0, (unsigned long *)bl);
+}
-static void glock_free(struct gfs2_glock *gl)
+static inline void spin_unlock_bucket(unsigned int hash)
+{
+ struct hlist_bl_head *bl = &gl_hash_table[hash];
+ __bit_spin_unlock(0, (unsigned long *)bl);
+}
+
+static void gfs2_glock_dealloc(struct rcu_head *rcu)
+{
+ struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
+
+ if (gl->gl_ops->go_flags & GLOF_ASPACE)
+ kmem_cache_free(gfs2_glock_aspace_cachep, gl);
+ else
+ kmem_cache_free(gfs2_glock_cachep, gl);
+}
+
+void gfs2_glock_free(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
- struct address_space *mapping = gfs2_glock2aspace(gl);
- struct kmem_cache *cachep = gfs2_glock_cachep;
- GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
- trace_gfs2_glock_put(gl);
- if (mapping)
- cachep = gfs2_glock_aspace_cachep;
- sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl);
+ call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
+ if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+ wake_up(&sdp->sd_glock_wait);
}
/**
@@ -185,34 +145,49 @@ static int demote_ok(const struct gfs2_glock *gl)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
+ /* assert_spin_locked(&gl->gl_spin); */
+
if (gl->gl_state == LM_ST_UNLOCKED)
return 0;
- if (!list_empty(&gl->gl_holders))
+ if (test_bit(GLF_LFLUSH, &gl->gl_flags))
+ return 0;
+ if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
+ !list_empty(&gl->gl_holders))
return 0;
if (glops->go_demote_ok)
return glops->go_demote_ok(gl);
return 1;
}
+
/**
- * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
+ * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
* @gl: the glock
*
+ * If the glock is demotable, then we add it (or move it) to the end
+ * of the glock LRU list.
*/
-static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
+static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
{
- int may_reclaim;
- may_reclaim = (demote_ok(gl) &&
- (atomic_read(&gl->gl_ref) == 1 ||
- (gl->gl_name.ln_type == LM_TYPE_INODE &&
- atomic_read(&gl->gl_ref) <= 2)));
- spin_lock(&lru_lock);
- if (list_empty(&gl->gl_lru) && may_reclaim) {
+ if (demote_ok(gl)) {
+ spin_lock(&lru_lock);
+
+ if (!list_empty(&gl->gl_lru))
+ list_del_init(&gl->gl_lru);
+ else
+ atomic_inc(&lru_count);
+
list_add_tail(&gl->gl_lru, &lru_list);
- atomic_inc(&lru_count);
+ spin_unlock(&lru_lock);
}
- spin_unlock(&lru_lock);
+}
+
+void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
+{
+ spin_lock(&gl->gl_spin);
+ __gfs2_glock_schedule_for_reclaim(gl);
+ spin_unlock(&gl->gl_spin);
}
/**
@@ -227,7 +202,6 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
{
if (atomic_dec_and_test(&gl->gl_ref))
GLOCK_BUG_ON(gl, 1);
- gfs2_glock_schedule_for_reclaim(gl);
}
/**
@@ -236,30 +210,26 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
*
*/
-int gfs2_glock_put(struct gfs2_glock *gl)
+void gfs2_glock_put(struct gfs2_glock *gl)
{
- int rv = 0;
+ struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct address_space *mapping = gfs2_glock2aspace(gl);
- write_lock(gl_lock_addr(gl->gl_hash));
- if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) {
- hlist_del(&gl->gl_list);
+ if (atomic_dec_and_test(&gl->gl_ref)) {
+ spin_lock_bucket(gl->gl_hash);
+ hlist_bl_del_rcu(&gl->gl_list);
+ spin_unlock_bucket(gl->gl_hash);
+ spin_lock(&lru_lock);
if (!list_empty(&gl->gl_lru)) {
list_del_init(&gl->gl_lru);
atomic_dec(&lru_count);
}
spin_unlock(&lru_lock);
- write_unlock(gl_lock_addr(gl->gl_hash));
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
- glock_free(gl);
- rv = 1;
- goto out;
+ GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
+ trace_gfs2_glock_put(gl);
+ sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
}
- spin_lock(&gl->gl_spin);
- gfs2_glock_schedule_for_reclaim(gl);
- spin_unlock(&gl->gl_spin);
- write_unlock(gl_lock_addr(gl->gl_hash));
-out:
- return rv;
}
/**
@@ -275,17 +245,15 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
const struct lm_lockname *name)
{
struct gfs2_glock *gl;
- struct hlist_node *h;
+ struct hlist_bl_node *h;
- hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) {
+ hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
if (!lm_name_equal(&gl->gl_name, name))
continue;
if (gl->gl_sbd != sdp)
continue;
-
- atomic_inc(&gl->gl_ref);
-
- return gl;
+ if (atomic_inc_not_zero(&gl->gl_ref))
+ return gl;
}
return NULL;
@@ -743,10 +711,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
struct gfs2_glock *gl, *tmp;
unsigned int hash = gl_hash(sdp, &name);
struct address_space *mapping;
+ struct kmem_cache *cachep;
- read_lock(gl_lock_addr(hash));
+ rcu_read_lock();
gl = search_bucket(hash, sdp, &name);
- read_unlock(gl_lock_addr(hash));
+ rcu_read_unlock();
*glp = gl;
if (gl)
@@ -755,9 +724,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
return -ENOENT;
if (glops->go_flags & GLOF_ASPACE)
- gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL);
+ cachep = gfs2_glock_aspace_cachep;
else
- gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL);
+ cachep = gfs2_glock_cachep;
+ gl = kmem_cache_alloc(cachep, GFP_KERNEL);
if (!gl)
return -ENOMEM;
@@ -790,15 +760,16 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping->writeback_index = 0;
}
- write_lock(gl_lock_addr(hash));
+ spin_lock_bucket(hash);
tmp = search_bucket(hash, sdp, &name);
if (tmp) {
- write_unlock(gl_lock_addr(hash));
- glock_free(gl);
+ spin_unlock_bucket(hash);
+ kmem_cache_free(cachep, gl);
+ atomic_dec(&sdp->sd_glock_disposal);
gl = tmp;
} else {
- hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list);
- write_unlock(gl_lock_addr(hash));
+ hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
+ spin_unlock_bucket(hash);
}
*glp = gl;
@@ -1007,13 +978,13 @@ fail:
insert_pt = &gh2->gh_list;
}
set_bit(GLF_QUEUED, &gl->gl_flags);
+ trace_gfs2_glock_queue(gh, 1);
if (likely(insert_pt == NULL)) {
list_add_tail(&gh->gh_list, &gl->gl_holders);
if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
goto do_cancel;
return;
}
- trace_gfs2_glock_queue(gh, 1);
list_add_tail(&gh->gh_list, insert_pt);
do_cancel:
gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1113,6 +1084,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
!test_bit(GLF_DEMOTE, &gl->gl_flags))
fast_path = 1;
}
+ __gfs2_glock_schedule_for_reclaim(gl);
trace_gfs2_glock_queue(gh, 0);
spin_unlock(&gl->gl_spin);
if (likely(fast_path))
@@ -1276,10 +1248,8 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
{
- unsigned int x;
-
- for (x = 0; x < num_gh; x++)
- gfs2_glock_dq(&ghs[x]);
+ while (num_gh--)
+ gfs2_glock_dq(&ghs[num_gh]);
}
/**
@@ -1291,10 +1261,8 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
{
- unsigned int x;
-
- for (x = 0; x < num_gh; x++)
- gfs2_glock_dq_uninit(&ghs[x]);
+ while (num_gh--)
+ gfs2_glock_dq_uninit(&ghs[num_gh]);
}
void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
@@ -1440,42 +1408,30 @@ static struct shrinker glock_shrinker = {
* @sdp: the filesystem
* @bucket: the bucket
*
- * Returns: 1 if the bucket has entries
*/
-static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp,
+static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
unsigned int hash)
{
- struct gfs2_glock *gl, *prev = NULL;
- int has_entries = 0;
- struct hlist_head *head = &gl_hash_table[hash].hb_list;
+ struct gfs2_glock *gl;
+ struct hlist_bl_head *head = &gl_hash_table[hash];
+ struct hlist_bl_node *pos;
- read_lock(gl_lock_addr(hash));
- /* Can't use hlist_for_each_entry - don't want prefetch here */
- if (hlist_empty(head))
- goto out;
- gl = list_entry(head->first, struct gfs2_glock, gl_list);
- while(1) {
- if (!sdp || gl->gl_sbd == sdp) {
- gfs2_glock_hold(gl);
- read_unlock(gl_lock_addr(hash));
- if (prev)
- gfs2_glock_put(prev);
- prev = gl;
+ rcu_read_lock();
+ hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
+ if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref))
examiner(gl);
- has_entries = 1;
- read_lock(gl_lock_addr(hash));
- }
- if (gl->gl_list.next == NULL)
- break;
- gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list);
}
-out:
- read_unlock(gl_lock_addr(hash));
- if (prev)
- gfs2_glock_put(prev);
+ rcu_read_unlock();
cond_resched();
- return has_entries;
+}
+
+static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
+{
+ unsigned x;
+
+ for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
+ examine_bucket(examiner, sdp, x);
}
@@ -1529,10 +1485,21 @@ static void clear_glock(struct gfs2_glock *gl)
void gfs2_glock_thaw(struct gfs2_sbd *sdp)
{
- unsigned x;
+ glock_hash_walk(thaw_glock, sdp);
+}
- for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
- examine_bucket(thaw_glock, sdp, x);
+static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
+{
+ int ret;
+ spin_lock(&gl->gl_spin);
+ ret = __dump_glock(seq, gl);
+ spin_unlock(&gl->gl_spin);
+ return ret;
+}
+
+static void dump_glock_func(struct gfs2_glock *gl)
+{
+ dump_glock(NULL, gl);
}
/**
@@ -1545,13 +1512,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp)
void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
{
- unsigned int x;
-
- for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
- examine_bucket(clear_glock, sdp, x);
+ glock_hash_walk(clear_glock, sdp);
flush_workqueue(glock_workqueue);
wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
- gfs2_dump_lockstate(sdp);
+ glock_hash_walk(dump_glock_func, sdp);
}
void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
@@ -1717,73 +1681,22 @@ out:
return error;
}
-static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
-{
- int ret;
- spin_lock(&gl->gl_spin);
- ret = __dump_glock(seq, gl);
- spin_unlock(&gl->gl_spin);
- return ret;
-}
-/**
- * gfs2_dump_lockstate - print out the current lockstate
- * @sdp: the filesystem
- * @ub: the buffer to copy the information into
- *
- * If @ub is NULL, dump the lockstate to the console.
- *
- */
-
-static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
-{
- struct gfs2_glock *gl;
- struct hlist_node *h;
- unsigned int x;
- int error = 0;
-
- for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
-
- read_lock(gl_lock_addr(x));
-
- hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) {
- if (gl->gl_sbd != sdp)
- continue;
-
- error = dump_glock(NULL, gl);
- if (error)
- break;
- }
-
- read_unlock(gl_lock_addr(x));
-
- if (error)
- break;
- }
-
-
- return error;
-}
int __init gfs2_glock_init(void)
{
unsigned i;
for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
- INIT_HLIST_HEAD(&gl_hash_table[i].hb_list);
- }
-#ifdef GL_HASH_LOCK_SZ
- for(i = 0; i < GL_HASH_LOCK_SZ; i++) {
- rwlock_init(&gl_hash_locks[i]);
+ INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
}
-#endif
glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
- WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+ WQ_HIGHPRI | WQ_FREEZABLE, 0);
if (IS_ERR(glock_workqueue))
return PTR_ERR(glock_workqueue);
gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
- WQ_MEM_RECLAIM | WQ_FREEZEABLE,
+ WQ_MEM_RECLAIM | WQ_FREEZABLE,
0);
if (IS_ERR(gfs2_delete_workqueue)) {
destroy_workqueue(glock_workqueue);
@@ -1802,62 +1715,54 @@ void gfs2_glock_exit(void)
destroy_workqueue(gfs2_delete_workqueue);
}
+static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
+{
+ return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
+ struct gfs2_glock, gl_list);
+}
+
+static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
+{
+ return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
+ struct gfs2_glock, gl_list);
+}
+
static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
{
struct gfs2_glock *gl;
-restart:
- read_lock(gl_lock_addr(gi->hash));
- gl = gi->gl;
- if (gl) {
- gi->gl = hlist_entry(gl->gl_list.next,
- struct gfs2_glock, gl_list);
- } else {
- gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
- struct gfs2_glock, gl_list);
- }
- if (gi->gl)
- gfs2_glock_hold(gi->gl);
- read_unlock(gl_lock_addr(gi->hash));
- if (gl)
- gfs2_glock_put(gl);
- while (gi->gl == NULL) {
- gi->hash++;
- if (gi->hash >= GFS2_GL_HASH_SIZE)
- return 1;
- read_lock(gl_lock_addr(gi->hash));
- gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
- struct gfs2_glock, gl_list);
- if (gi->gl)
- gfs2_glock_hold(gi->gl);
- read_unlock(gl_lock_addr(gi->hash));
- }
-
- if (gi->sdp != gi->gl->gl_sbd)
- goto restart;
+ do {
+ gl = gi->gl;
+ if (gl) {
+ gi->gl = glock_hash_next(gl);
+ } else {
+ gi->gl = glock_hash_chain(gi->hash);
+ }
+ while (gi->gl == NULL) {
+ gi->hash++;
+ if (gi->hash >= GFS2_GL_HASH_SIZE) {
+ rcu_read_unlock();
+ return 1;
+ }
+ gi->gl = glock_hash_chain(gi->hash);
+ }
+ /* Skip entries for other sb and dead entries */
+ } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
return 0;
}
-static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi)
-{
- if (gi->gl)
- gfs2_glock_put(gi->gl);
- gi->gl = NULL;
-}
-
static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
{
struct gfs2_glock_iter *gi = seq->private;
loff_t n = *pos;
gi->hash = 0;
+ rcu_read_lock();
do {
- if (gfs2_glock_iter_next(gi)) {
- gfs2_glock_iter_free(gi);
+ if (gfs2_glock_iter_next(gi))
return NULL;
- }
} while (n--);
return gi->gl;
@@ -1870,10 +1775,8 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
(*pos)++;
- if (gfs2_glock_iter_next(gi)) {
- gfs2_glock_iter_free(gi);
+ if (gfs2_glock_iter_next(gi))
return NULL;
- }
return gi->gl;
}
@@ -1881,7 +1784,10 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
{
struct gfs2_glock_iter *gi = seq->private;
- gfs2_glock_iter_free(gi);
+
+ if (gi->gl)
+ rcu_read_unlock();
+ gi->gl = NULL;
}
static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 691851ceb615..aea160690e94 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -118,7 +118,7 @@ struct lm_lockops {
int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
void (*lm_unmount) (struct gfs2_sbd *sdp);
void (*lm_withdraw) (struct gfs2_sbd *sdp);
- void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
+ void (*lm_put_lock) (struct gfs2_glock *gl);
int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
unsigned int flags);
void (*lm_cancel) (struct gfs2_glock *gl);
@@ -174,7 +174,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp,
int create, struct gfs2_glock **glp);
void gfs2_glock_hold(struct gfs2_glock *gl);
void gfs2_glock_put_nolock(struct gfs2_glock *gl);
-int gfs2_glock_put(struct gfs2_glock *gl);
+void gfs2_glock_put(struct gfs2_glock *gl);
void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
struct gfs2_holder *gh);
void gfs2_holder_reinit(unsigned int state, unsigned flags,
@@ -223,25 +223,22 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
return error;
}
-/* Lock Value Block functions */
-
-int gfs2_lvb_hold(struct gfs2_glock *gl);
-void gfs2_lvb_unhold(struct gfs2_glock *gl);
-
-void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
-void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
-void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
-void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
-void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
-void gfs2_glock_thaw(struct gfs2_sbd *sdp);
-
-int __init gfs2_glock_init(void);
-void gfs2_glock_exit(void);
-
-int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
-void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
-int gfs2_register_debugfs(void);
-void gfs2_unregister_debugfs(void);
+extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
+extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
+extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
+extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
+extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
+extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
+extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
+extern void gfs2_glock_free(struct gfs2_glock *gl);
+
+extern int __init gfs2_glock_init(void);
+extern void gfs2_glock_exit(void);
+
+extern int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
+extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
+extern int gfs2_register_debugfs(void);
+extern void gfs2_unregister_debugfs(void);
extern const struct lm_lockops gfs2_dlm_ops;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 263561bf1a50..3754e3cbf02b 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -56,20 +56,26 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
BUG_ON(current->journal_info);
current->journal_info = &tr;
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata,
bd_ail_gl_list);
bh = bd->bd_bh;
gfs2_remove_from_ail(bd);
+ spin_unlock(&sdp->sd_ail_lock);
+
bd->bd_bh = NULL;
bh->b_private = NULL;
bd->bd_blkno = bh->b_blocknr;
+ gfs2_log_lock(sdp);
gfs2_assert_withdraw(sdp, !buffer_busy(bh));
gfs2_trans_add_revoke(sdp, bd);
+ gfs2_log_unlock(sdp);
+
+ spin_lock(&sdp->sd_ail_lock);
}
gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL);
@@ -206,8 +212,17 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
static int inode_go_demote_ok(const struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_holder *gh;
+
if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
return 0;
+
+ if (!list_empty(&gl->gl_holders)) {
+ gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
+ if (gh->gh_list.next != &gl->gl_holders)
+ return 0;
+ }
+
return 1;
}
@@ -272,19 +287,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
}
/**
- * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
- * @gl: the glock
- *
- * Returns: 1 if it's ok
- */
-
-static int rgrp_go_demote_ok(const struct gfs2_glock *gl)
-{
- const struct address_space *mapping = (const struct address_space *)(gl + 1);
- return !mapping->nrpages;
-}
-
-/**
* rgrp_go_lock - operation done after an rgrp lock is locked by
* a first holder on this node.
* @gl: the glock
@@ -410,7 +412,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_xmote_th = rgrp_go_sync,
.go_inval = rgrp_go_inval,
- .go_demote_ok = rgrp_go_demote_ok,
.go_lock = rgrp_go_lock,
.go_unlock = rgrp_go_unlock,
.go_dump = gfs2_rgrp_dump,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a79790c06275..870a89d6d4dc 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -15,6 +15,8 @@
#include <linux/workqueue.h>
#include <linux/dlm.h>
#include <linux/buffer_head.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist_bl.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
@@ -201,7 +203,7 @@ enum {
};
struct gfs2_glock {
- struct hlist_node gl_list;
+ struct hlist_bl_node gl_list;
unsigned long gl_flags; /* GLF_... */
struct lm_lockname gl_name;
atomic_t gl_ref;
@@ -234,6 +236,7 @@ struct gfs2_glock {
atomic_t gl_ail_count;
struct delayed_work gl_work;
struct work_struct gl_delete;
+ struct rcu_head gl_rcu;
};
#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
@@ -314,6 +317,7 @@ enum {
QDF_USER = 0,
QDF_CHANGE = 1,
QDF_LOCKED = 2,
+ QDF_REFRESH = 3,
};
struct gfs2_quota_data {
@@ -647,6 +651,7 @@ struct gfs2_sbd {
unsigned int sd_log_flush_head;
u64 sd_log_flush_wrapped;
+ spinlock_t sd_ail_lock;
struct list_head sd_ail1_list;
struct list_head sd_ail2_list;
u64 sd_ail_sync_gen;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7aa7d4f8984a..97d54a28776a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -763,14 +763,15 @@ fail:
return error;
}
-static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
+static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
+ const struct qstr *qstr)
{
int err;
size_t len;
void *value;
char *name;
- err = security_inode_init_security(&ip->i_inode, &dip->i_inode,
+ err = security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr,
&name, &value, &len);
if (err) {
@@ -854,7 +855,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
if (error)
goto fail_gunlock2;
- error = gfs2_security_init(dip, GFS2_I(inode));
+ error = gfs2_security_init(dip, GFS2_I(inode), name);
if (error)
goto fail_gunlock2;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 6e493aee28f8..98c80d8c2a62 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -22,7 +22,6 @@ static void gdlm_ast(void *arg)
{
struct gfs2_glock *gl = arg;
unsigned ret = gl->gl_state;
- struct gfs2_sbd *sdp = gl->gl_sbd;
BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
@@ -31,12 +30,7 @@ static void gdlm_ast(void *arg)
switch (gl->gl_lksb.sb_status) {
case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
- if (gl->gl_ops->go_flags & GLOF_ASPACE)
- kmem_cache_free(gfs2_glock_aspace_cachep, gl);
- else
- kmem_cache_free(gfs2_glock_cachep, gl);
- if (atomic_dec_and_test(&sdp->sd_glock_disposal))
- wake_up(&sdp->sd_glock_wait);
+ gfs2_glock_free(gl);
return;
case -DLM_ECANCEL: /* Cancel while getting lock */
ret |= LM_OUT_CANCELED;
@@ -164,16 +158,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
}
-static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
+static void gdlm_put_lock(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int error;
if (gl->gl_lksb.sb_lkid == 0) {
- kmem_cache_free(cachep, gl);
- if (atomic_dec_and_test(&sdp->sd_glock_disposal))
- wake_up(&sdp->sd_glock_wait);
+ gfs2_glock_free(gl);
return;
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index eb01f3575e10..e7ed31f858dd 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -67,7 +67,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
* @mapping: The associated mapping (maybe NULL)
* @bd: The gfs2_bufdata to remove
*
- * The log lock _must_ be held when calling this function
+ * The ail lock _must_ be held when calling this function
*
*/
@@ -88,8 +88,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
*/
static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
-__releases(&sdp->sd_log_lock)
-__acquires(&sdp->sd_log_lock)
+__releases(&sdp->sd_ail_lock)
+__acquires(&sdp->sd_ail_lock)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
@@ -117,7 +117,7 @@ __acquires(&sdp->sd_log_lock)
list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
get_bh(bh);
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
@@ -126,7 +126,7 @@ __acquires(&sdp->sd_log_lock)
unlock_buffer(bh);
brelse(bh);
}
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
retry = 1;
break;
@@ -175,10 +175,10 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
struct gfs2_ail *ai;
int done = 0;
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
head = &sdp->sd_ail1_list;
if (list_empty(head)) {
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
return;
}
sync_gen = sdp->sd_ail_sync_gen++;
@@ -189,13 +189,13 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
if (ai->ai_sync_gen >= sync_gen)
continue;
ai->ai_sync_gen = sync_gen;
- gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */
+ gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
done = 0;
break;
}
}
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
}
static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
@@ -203,7 +203,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
struct gfs2_ail *ai, *s;
int ret;
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
if (gfs2_ail1_empty_one(sdp, ai, flags))
@@ -214,7 +214,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
ret = list_empty(&sdp->sd_ail1_list);
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
return ret;
}
@@ -247,7 +247,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
int wrap = (new_tail < old_tail);
int a, b, rm;
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
a = (old_tail <= ai->ai_first);
@@ -263,7 +263,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
kfree(ai);
}
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
}
/**
@@ -421,7 +421,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
struct gfs2_ail *ai;
unsigned int tail;
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
if (list_empty(&sdp->sd_ail1_list)) {
tail = sdp->sd_log_head;
@@ -430,7 +430,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
tail = ai->ai_first;
}
- gfs2_log_unlock(sdp);
+ spin_unlock(&sdp->sd_ail_lock);
return tail;
}
@@ -743,10 +743,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
sdp->sd_log_commited_databuf = 0;
sdp->sd_log_commited_revoke = 0;
+ spin_lock(&sdp->sd_ail_lock);
if (!list_empty(&ai->ai_ail1_list)) {
list_add(&ai->ai_list, &sdp->sd_ail1_list);
ai = NULL;
}
+ spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
trace_gfs2_log_flush(sdp, 0);
up_write(&sdp->sd_log_flush_lock);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index bf33f822058d..e919abf25ecd 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -51,8 +51,10 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
/* If this buffer is in the AIL and it has already been written
* to in-place disk block, remove it from the AIL.
*/
+ spin_lock(&sdp->sd_ail_lock);
if (bd->bd_ail)
list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
+ spin_unlock(&sdp->sd_ail_lock);
get_bh(bh);
atomic_inc(&sdp->sd_log_pinned);
trace_gfs2_pin(bd, 1);
@@ -80,7 +82,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
mark_buffer_dirty(bh);
clear_buffer_pinned(bh);
- gfs2_log_lock(sdp);
+ spin_lock(&sdp->sd_ail_lock);
if (bd->bd_ail) {
list_del(&bd->bd_ail_st_list);
brelse(bh);
@@ -91,9 +93,11 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
}
bd->bd_ail = ai;
list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
- clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+ spin_unlock(&sdp->sd_ail_lock);
+
+ if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
+ gfs2_glock_schedule_for_reclaim(bd->bd_gl);
trace_gfs2_pin(bd, 0);
- gfs2_log_unlock(sdp);
unlock_buffer(bh);
atomic_dec(&sdp->sd_log_pinned);
}
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index ebef7ab6e17e..888a5f5a1a58 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -14,6 +14,8 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/gfs2_ondisk.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist_bl.h>
#include <asm/atomic.h>
#include "gfs2.h"
@@ -45,7 +47,7 @@ static void gfs2_init_glock_once(void *foo)
{
struct gfs2_glock *gl = foo;
- INIT_HLIST_NODE(&gl->gl_list);
+ INIT_HLIST_BL_NODE(&gl->gl_list);
spin_lock_init(&gl->gl_spin);
INIT_LIST_HEAD(&gl->gl_holders);
INIT_LIST_HEAD(&gl->gl_lru);
@@ -59,14 +61,7 @@ static void gfs2_init_gl_aspace_once(void *foo)
struct address_space *mapping = (struct address_space *)(gl + 1);
gfs2_init_glock_once(gl);
- memset(mapping, 0, sizeof(*mapping));
- INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
- spin_lock_init(&mapping->tree_lock);
- spin_lock_init(&mapping->i_mmap_lock);
- INIT_LIST_HEAD(&mapping->private_list);
- spin_lock_init(&mapping->private_lock);
- INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
- INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+ address_space_init_once(mapping);
}
/**
@@ -144,7 +139,7 @@ static int __init init_gfs2_fs(void)
error = -ENOMEM;
gfs_recovery_wq = alloc_workqueue("gfs_recovery",
- WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0);
+ WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
if (!gfs_recovery_wq)
goto fail_wq;
@@ -198,6 +193,8 @@ static void __exit exit_gfs2_fs(void)
unregister_filesystem(&gfs2meta_fs_type);
destroy_workqueue(gfs_recovery_wq);
+ rcu_barrier();
+
kmem_cache_destroy(gfs2_quotad_cachep);
kmem_cache_destroy(gfs2_rgrpd_cachep);
kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 939739c7b3f9..01d97f486553 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -326,6 +326,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
brelse(bh);
}
if (bd) {
+ spin_lock(&sdp->sd_ail_lock);
if (bd->bd_ail) {
gfs2_remove_from_ail(bd);
bh->b_private = NULL;
@@ -333,6 +334,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
bd->bd_blkno = bh->b_blocknr;
gfs2_trans_add_revoke(sdp, bd);
}
+ spin_unlock(&sdp->sd_ail_lock);
}
clear_buffer_dirty(bh);
clear_buffer_uptodate(bh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 777927ce6f79..42ef24355afb 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -99,6 +99,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
init_waitqueue_head(&sdp->sd_log_waitq);
init_waitqueue_head(&sdp->sd_logd_waitq);
+ spin_lock_init(&sdp->sd_ail_lock);
INIT_LIST_HEAD(&sdp->sd_ail1_list);
INIT_LIST_HEAD(&sdp->sd_ail2_list);
@@ -928,17 +929,9 @@ static const match_table_t nolock_tokens = {
{ Opt_err, NULL },
};
-static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
-{
- struct gfs2_sbd *sdp = gl->gl_sbd;
- kmem_cache_free(cachep, gl);
- if (atomic_dec_and_test(&sdp->sd_glock_disposal))
- wake_up(&sdp->sd_glock_wait);
-}
-
static const struct lm_lockops nolock_ops = {
.lm_proto_name = "lock_nolock",
- .lm_put_lock = nolock_put_lock,
+ .lm_put_lock = gfs2_glock_free,
.lm_tokens = &nolock_tokens,
};
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d8b26ac2e20b..09e436a50723 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1026,9 +1026,9 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
/**
* gfs2_permission -
- * @inode:
- * @mask:
- * @nd: passed from Linux VFS, ignored by us
+ * @inode: The inode
+ * @mask: The mask to be tested
+ * @flags: Indicates whether this is an RCU path walk or not
*
* This may be called from the VFS directly, or from within GFS2 with the
* inode locked, so we look to see if the glock is already locked and only
@@ -1044,11 +1044,11 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
int error;
int unlock = 0;
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
ip = GFS2_I(inode);
if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return error;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a689901963de..e23d9864c418 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -834,6 +834,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
goto out_end_trans;
do_qc(qd, -qd->qd_change_sync);
+ set_bit(QDF_REFRESH, &qd->qd_flags);
}
error = 0;
@@ -929,6 +930,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_alloc *al = ip->i_alloc;
+ struct gfs2_quota_data *qd;
unsigned int x;
int error = 0;
@@ -942,7 +944,11 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
sort_qd, NULL);
for (x = 0; x < al->al_qd_num; x++) {
- error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]);
+ int force = NO_FORCE;
+ qd = al->al_qd[x];
+ if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
+ force = FORCE;
+ error = do_glock(qd, force, &al->al_qd_ghs[x]);
if (error)
break;
}
@@ -1587,6 +1593,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
offset = qd2offset(qd);
alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota));
+ if (gfs2_is_stuffed(ip))
+ alloc_required = 1;
if (alloc_required) {
al = gfs2_alloc_get(ip);
if (al == NULL)
@@ -1600,7 +1608,9 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
blocks += gfs2_rg_blocks(al);
}
- error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0);
+ /* Some quotas span block boundaries and can update two blocks,
+ adding an extra block to the transaction to handle such quotas */
+ error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 2, 0);
if (error)
goto out_release;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7293ea27020c..cf930cd9664a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1602,7 +1602,7 @@ rgrp_error:
*
*/
-void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
+void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd;
@@ -1617,7 +1617,21 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_trans_add_rg(rgd);
+}
+/**
+ * gfs2_free_data - free a contiguous run of data block(s)
+ * @ip: the inode these blocks are being freed from
+ * @bstart: first block of a run of contiguous blocks
+ * @blen: the length of the block run
+ *
+ */
+
+void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+
+ __gfs2_free_data(ip, bstart, blen);
gfs2_statfs_change(sdp, 0, +blen, 0);
gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
}
@@ -1630,7 +1644,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
*
*/
-void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
+void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd;
@@ -1645,10 +1659,24 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_trans_add_rg(rgd);
+ gfs2_meta_wipe(ip, bstart, blen);
+}
+/**
+ * gfs2_free_meta - free a contiguous run of data block(s)
+ * @ip: the inode these blocks are being freed from
+ * @bstart: first block of a run of contiguous blocks
+ * @blen: the length of the block run
+ *
+ */
+
+void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+
+ __gfs2_free_meta(ip, bstart, blen);
gfs2_statfs_change(sdp, 0, +blen, 0);
gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
- gfs2_meta_wipe(ip, bstart, blen);
}
void gfs2_unlink_di(struct inode *inode)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 50c2bb04369c..a80e3034ac47 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -52,7 +52,9 @@ extern int gfs2_ri_update(struct gfs2_inode *ip);
extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
+extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
extern void gfs2_unlink_di(struct inode *inode);
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index afa66aaa2237..b4d70b13be92 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -238,46 +238,22 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
}
/*
- * hfs_unlink()
+ * hfs_remove()
*
- * This is the unlink() entry in the inode_operations structure for
- * regular HFS directories. The purpose is to delete an existing
- * file, given the inode for the parent directory and the name
- * (and its length) of the existing file.
- */
-static int hfs_unlink(struct inode *dir, struct dentry *dentry)
-{
- struct inode *inode;
- int res;
-
- inode = dentry->d_inode;
- res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
- if (res)
- return res;
-
- drop_nlink(inode);
- hfs_delete_inode(inode);
- inode->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(inode);
-
- return res;
-}
-
-/*
- * hfs_rmdir()
+ * This serves as both unlink() and rmdir() in the inode_operations
+ * structure for regular HFS directories. The purpose is to delete
+ * an existing child, given the inode for the parent directory and
+ * the name (and its length) of the existing directory.
*
- * This is the rmdir() entry in the inode_operations structure for
- * regular HFS directories. The purpose is to delete an existing
- * directory, given the inode for the parent directory and the name
- * (and its length) of the existing directory.
+ * HFS does not have hardlinks, so both rmdir and unlink set the
+ * link count to 0. The only difference is the emptiness check.
*/
-static int hfs_rmdir(struct inode *dir, struct dentry *dentry)
+static int hfs_remove(struct inode *dir, struct dentry *dentry)
{
- struct inode *inode;
+ struct inode *inode = dentry->d_inode;
int res;
- inode = dentry->d_inode;
- if (inode->i_size != 2)
+ if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
return -ENOTEMPTY;
res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
if (res)
@@ -307,7 +283,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* Unlink destination if it already exists */
if (new_dentry->d_inode) {
- res = hfs_unlink(new_dir, new_dentry);
+ res = hfs_remove(new_dir, new_dentry);
if (res)
return res;
}
@@ -332,9 +308,9 @@ const struct file_operations hfs_dir_operations = {
const struct inode_operations hfs_dir_inode_operations = {
.create = hfs_create,
.lookup = hfs_lookup,
- .unlink = hfs_unlink,
+ .unlink = hfs_remove,
.mkdir = hfs_mkdir,
- .rmdir = hfs_rmdir,
+ .rmdir = hfs_remove,
.rename = hfs_rename,
.setattr = hfs_inode_setattr,
};
diff --git a/fs/inode.c b/fs/inode.c
index da85e56378f3..9910c039f026 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -84,16 +84,13 @@ static struct hlist_head *inode_hashtable __read_mostly;
DEFINE_SPINLOCK(inode_lock);
/*
- * iprune_sem provides exclusion between the kswapd or try_to_free_pages
- * icache shrinking path, and the umount path. Without this exclusion,
- * by the time prune_icache calls iput for the inode whose pages it has
- * been invalidating, or by the time it calls clear_inode & destroy_inode
- * from its final dispose_list, the struct super_block they refer to
- * (for inode->i_sb->s_op) may already have been freed and reused.
+ * iprune_sem provides exclusion between the icache shrinking and the
+ * umount path.
*
- * We make this an rwsem because the fastpath is icache shrinking. In
- * some cases a filesystem may be doing a significant amount of work in
- * its inode reclaim code, so this should improve parallelism.
+ * We don't actually need it to protect anything in the umount path,
+ * but only need to cycle through it to make sure any inode that
+ * prune_icache took off the LRU list has been fully torn down by the
+ * time we are past evict_inodes.
*/
static DECLARE_RWSEM(iprune_sem);
@@ -295,6 +292,20 @@ static void destroy_inode(struct inode *inode)
call_rcu(&inode->i_rcu, i_callback);
}
+void address_space_init_once(struct address_space *mapping)
+{
+ memset(mapping, 0, sizeof(*mapping));
+ INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
+ spin_lock_init(&mapping->tree_lock);
+ spin_lock_init(&mapping->i_mmap_lock);
+ INIT_LIST_HEAD(&mapping->private_list);
+ spin_lock_init(&mapping->private_lock);
+ INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
+ INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+ mutex_init(&mapping->unmap_mutex);
+}
+EXPORT_SYMBOL(address_space_init_once);
+
/*
* These are initializations that only need to be done
* once, because the fields are idempotent across use
@@ -308,13 +319,7 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_devices);
INIT_LIST_HEAD(&inode->i_wb_list);
INIT_LIST_HEAD(&inode->i_lru);
- INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
- spin_lock_init(&inode->i_data.tree_lock);
- spin_lock_init(&inode->i_data.i_mmap_lock);
- INIT_LIST_HEAD(&inode->i_data.private_list);
- spin_lock_init(&inode->i_data.private_lock);
- INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
- INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
+ address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
@@ -508,17 +513,12 @@ void evict_inodes(struct super_block *sb)
struct inode *inode, *next;
LIST_HEAD(dispose);
- down_write(&iprune_sem);
-
spin_lock(&inode_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
if (atomic_read(&inode->i_count))
continue;
-
- if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
- WARN_ON(1);
+ if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
continue;
- }
inode->i_state |= I_FREEING;
@@ -534,28 +534,40 @@ void evict_inodes(struct super_block *sb)
spin_unlock(&inode_lock);
dispose_list(&dispose);
+
+ /*
+ * Cycle through iprune_sem to make sure any inode that prune_icache
+ * moved off the list before we took the lock has been fully torn
+ * down.
+ */
+ down_write(&iprune_sem);
up_write(&iprune_sem);
}
/**
* invalidate_inodes - attempt to free all inodes on a superblock
* @sb: superblock to operate on
+ * @kill_dirty: flag to guide handling of dirty inodes
*
* Attempts to free all inodes for a given superblock. If there were any
* busy inodes return a non-zero value, else zero.
+ * If @kill_dirty is set, discard dirty inodes too, otherwise treat
+ * them as busy.
*/
-int invalidate_inodes(struct super_block *sb)
+int invalidate_inodes(struct super_block *sb, bool kill_dirty)
{
int busy = 0;
struct inode *inode, *next;
LIST_HEAD(dispose);
- down_write(&iprune_sem);
-
spin_lock(&inode_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
continue;
+ if (inode->i_state & I_DIRTY && !kill_dirty) {
+ busy = 1;
+ continue;
+ }
if (atomic_read(&inode->i_count)) {
busy = 1;
continue;
@@ -575,7 +587,6 @@ int invalidate_inodes(struct super_block *sb)
spin_unlock(&inode_lock);
dispose_list(&dispose);
- up_write(&iprune_sem);
return busy;
}
diff --git a/fs/internal.h b/fs/internal.h
index 0663568b1247..f3d15de44b15 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -106,10 +106,23 @@ extern void put_super(struct super_block *sb);
struct nameidata;
extern struct file *nameidata_to_filp(struct nameidata *);
extern void release_open_intent(struct nameidata *);
+struct open_flags {
+ int open_flag;
+ int mode;
+ int acc_mode;
+ int intent;
+};
+extern struct file *do_filp_open(int dfd, const char *pathname,
+ const struct open_flags *op, int lookup_flags);
+extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
+ const char *, const struct open_flags *, int lookup_flags);
+
+extern long do_handle_open(int mountdirfd,
+ struct file_handle __user *ufh, int open_flag);
/*
* inode.c
*/
extern int get_nr_dirty_inodes(void);
extern void evict_inodes(struct super_block *);
-extern int invalidate_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *, bool);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb38474..dd4687ff30d0 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
* offset of the inode and the upper 16 bits of fh32[1] to
* hold the offset of the parent.
*/
-
- if (len < 3 || (connectable && len < 5))
+ if (connectable && (len < 5)) {
+ *max_len = 5;
+ return 255;
+ } else if (len < 3) {
+ *max_len = 3;
return 255;
+ }
len = 3;
fh32[0] = ei->i_iget5_block;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 92978658ed18..82faddd1f321 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -215,8 +215,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
no chance of AB-BA deadlock involving its f->sem). */
mutex_unlock(&f->sem);
- ret = jffs2_do_create(c, dir_f, f, ri,
- dentry->d_name.name, dentry->d_name.len);
+ ret = jffs2_do_create(c, dir_f, f, ri, &dentry->d_name);
if (ret)
goto fail;
@@ -386,7 +385,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
- ret = jffs2_init_security(inode, dir_i);
+ ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
if (ret)
goto fail;
@@ -530,7 +529,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
- ret = jffs2_init_security(inode, dir_i);
+ ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
if (ret)
goto fail;
@@ -703,7 +702,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
- ret = jffs2_init_security(inode, dir_i);
+ ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
if (ret)
goto fail;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 5a53d9bdb2b5..e4619b00f7c5 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -401,7 +401,7 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
struct jffs2_raw_inode *ri, unsigned char *buf,
uint32_t offset, uint32_t writelen, uint32_t *retlen);
int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f,
- struct jffs2_raw_inode *ri, const char *name, int namelen);
+ struct jffs2_raw_inode *ri, const struct qstr *qstr);
int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, const char *name,
int namelen, struct jffs2_inode_info *dead_f, uint32_t time);
int jffs2_do_link(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint32_t ino,
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 239f51216a68..cfeb7164b085 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -23,14 +23,15 @@
#include "nodelist.h"
/* ---- Initial Security Label Attachment -------------- */
-int jffs2_init_security(struct inode *inode, struct inode *dir)
+int jffs2_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int rc;
size_t len;
void *value;
char *name;
- rc = security_inode_init_security(inode, dir, &name, &value, &len);
+ rc = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
if (rc) {
if (rc == -EOPNOTSUPP)
return 0;
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index c819eb0e982d..30d175b6d290 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -424,7 +424,9 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
return ret;
}
-int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f, struct jffs2_raw_inode *ri, const char *name, int namelen)
+int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
+ struct jffs2_inode_info *f, struct jffs2_raw_inode *ri,
+ const struct qstr *qstr)
{
struct jffs2_raw_dirent *rd;
struct jffs2_full_dnode *fn;
@@ -466,15 +468,15 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- ret = jffs2_init_security(&f->vfs_inode, &dir_f->vfs_inode);
+ ret = jffs2_init_security(&f->vfs_inode, &dir_f->vfs_inode, qstr);
if (ret)
return ret;
ret = jffs2_init_acl_post(&f->vfs_inode);
if (ret)
return ret;
- ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
- ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
+ ret = jffs2_reserve_space(c, sizeof(*rd)+qstr->len, &alloclen,
+ ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(qstr->len));
if (ret) {
/* Eep. */
@@ -493,19 +495,19 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
- rd->totlen = cpu_to_je32(sizeof(*rd) + namelen);
+ rd->totlen = cpu_to_je32(sizeof(*rd) + qstr->len);
rd->hdr_crc = cpu_to_je32(crc32(0, rd, sizeof(struct jffs2_unknown_node)-4));
rd->pino = cpu_to_je32(dir_f->inocache->ino);
rd->version = cpu_to_je32(++dir_f->highest_version);
rd->ino = ri->ino;
rd->mctime = ri->ctime;
- rd->nsize = namelen;
+ rd->nsize = qstr->len;
rd->type = DT_REG;
rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
- rd->name_crc = cpu_to_je32(crc32(0, name, namelen));
+ rd->name_crc = cpu_to_je32(crc32(0, qstr->name, qstr->len));
- fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, ALLOC_NORMAL);
+ fd = jffs2_write_dirent(c, dir_f, rd, qstr->name, qstr->len, ALLOC_NORMAL);
jffs2_free_raw_dirent(rd);
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
index cf4f5759b42b..7be4beb306f3 100644
--- a/fs/jffs2/xattr.h
+++ b/fs/jffs2/xattr.h
@@ -121,10 +121,11 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
#endif /* CONFIG_JFFS2_FS_XATTR */
#ifdef CONFIG_JFFS2_FS_SECURITY
-extern int jffs2_init_security(struct inode *inode, struct inode *dir);
+extern int jffs2_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr);
extern const struct xattr_handler jffs2_security_xattr_handler;
#else
-#define jffs2_init_security(inode,dir) (0)
+#define jffs2_init_security(inode,dir,qstr) (0)
#endif /* CONFIG_JFFS2_FS_SECURITY */
#endif /* _JFFS2_FS_XATTR_H_ */
diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h
index 88b6cc535bf2..e9e100fd7c09 100644
--- a/fs/jfs/jfs_xattr.h
+++ b/fs/jfs/jfs_xattr.h
@@ -62,10 +62,11 @@ extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
extern int jfs_removexattr(struct dentry *, const char *);
#ifdef CONFIG_JFS_SECURITY
-extern int jfs_init_security(tid_t, struct inode *, struct inode *);
+extern int jfs_init_security(tid_t, struct inode *, struct inode *,
+ const struct qstr *);
#else
static inline int jfs_init_security(tid_t tid, struct inode *inode,
- struct inode *dir)
+ struct inode *dir, const struct qstr *qstr)
{
return 0;
}
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 81ead850ddb6..eaaf2b511e89 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -115,7 +115,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
if (rc)
goto out3;
- rc = jfs_init_security(tid, ip, dip);
+ rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
if (rc) {
txAbort(tid, 0);
goto out3;
@@ -253,7 +253,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
if (rc)
goto out3;
- rc = jfs_init_security(tid, ip, dip);
+ rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
if (rc) {
txAbort(tid, 0);
goto out3;
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry,
if (ip->i_nlink == JFS_LINK_MAX)
return -EMLINK;
- if (ip->i_nlink == 0)
- return -ENOENT;
-
dquot_initialize(dir);
tid = txBegin(ip->i_sb, 0);
@@ -932,7 +929,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
- rc = jfs_init_security(tid, ip, dip);
+ rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
if (rc)
goto out3;
@@ -1395,7 +1392,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
if (rc)
goto out3;
- rc = jfs_init_security(tid, ip, dir);
+ rc = jfs_init_security(tid, ip, dir, &dentry->d_name);
if (rc) {
txAbort(tid, 0);
goto out3;
@@ -1600,7 +1597,7 @@ out:
static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
/*
* This is not negative dentry. Always valid.
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 2d7f165d0f1d..3fa4c32272df 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -1091,7 +1091,8 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
}
#ifdef CONFIG_JFS_SECURITY
-int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir)
+int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
{
int rc;
size_t len;
@@ -1099,7 +1100,8 @@ int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir)
char *suffix;
char *name;
- rc = security_inode_init_security(inode, dir, &suffix, &value, &len);
+ rc = security_inode_init_security(inode, dir, qstr, &suffix, &value,
+ &len);
if (rc) {
if (rc == -EOPNOTSUPP)
return 0;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ce7337ddfdbf..6e6777f1b4b2 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,7 +213,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
new_de = minix_find_entry(new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
minix_set_link(new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -225,18 +224,15 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= info->s_link_max)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = minix_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
minix_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
minix_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/namei.c b/fs/namei.c
index 9e701e28a329..b912b7abe747 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
return retval;
}
-char * getname(const char __user * filename)
+static char *getname_flags(const char __user * filename, int flags)
{
char *tmp, *result;
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
result = tmp;
if (retval < 0) {
- __putname(tmp);
- result = ERR_PTR(retval);
+ if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
+ __putname(tmp);
+ result = ERR_PTR(retval);
+ }
}
}
audit_getname(result);
return result;
}
+char *getname(const char __user * filename)
+{
+ return getname_flags(filename, 0);
+}
+
#ifdef CONFIG_AUDITSYSCALL
void putname(const char *name)
{
@@ -401,9 +408,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
{
struct fs_struct *fs = current->fs;
struct dentry *dentry = nd->path.dentry;
+ int want_root = 0;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (nd->root.mnt) {
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ want_root = 1;
spin_lock(&fs->lock);
if (nd->root.mnt != fs->root.mnt ||
nd->root.dentry != fs->root.dentry)
@@ -414,7 +423,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
goto err;
BUG_ON(nd->inode != dentry->d_inode);
spin_unlock(&dentry->d_lock);
- if (nd->root.mnt) {
+ if (want_root) {
path_get(&nd->root);
spin_unlock(&fs->lock);
}
@@ -427,7 +436,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
err:
spin_unlock(&dentry->d_lock);
err_root:
- if (nd->root.mnt)
+ if (want_root)
spin_unlock(&fs->lock);
return -ECHILD;
}
@@ -454,9 +463,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
{
struct fs_struct *fs = current->fs;
struct dentry *parent = nd->path.dentry;
+ int want_root = 0;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (nd->root.mnt) {
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ want_root = 1;
spin_lock(&fs->lock);
if (nd->root.mnt != fs->root.mnt ||
nd->root.dentry != fs->root.dentry)
@@ -476,7 +487,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
parent->d_count++;
spin_unlock(&dentry->d_lock);
spin_unlock(&parent->d_lock);
- if (nd->root.mnt) {
+ if (want_root) {
path_get(&nd->root);
spin_unlock(&fs->lock);
}
@@ -490,7 +501,7 @@ err:
spin_unlock(&dentry->d_lock);
spin_unlock(&parent->d_lock);
err_root:
- if (nd->root.mnt)
+ if (want_root)
spin_unlock(&fs->lock);
return -ECHILD;
}
@@ -498,8 +509,16 @@ err_root:
/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
{
- if (nd->flags & LOOKUP_RCU)
- return nameidata_dentry_drop_rcu(nd, dentry);
+ if (nd->flags & LOOKUP_RCU) {
+ if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ return -ECHILD;
+ }
+ }
return 0;
}
@@ -518,7 +537,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
BUG_ON(!(nd->flags & LOOKUP_RCU));
nd->flags &= ~LOOKUP_RCU;
- nd->root.mnt = NULL;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
spin_lock(&dentry->d_lock);
if (!__d_rcu_to_refcount(dentry, nd->seq))
goto err_unlock;
@@ -539,14 +559,6 @@ err_unlock:
return -ECHILD;
}
-/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
-static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
-{
- if (likely(nd->flags & LOOKUP_RCU))
- return nameidata_drop_rcu_last(nd);
- return 0;
-}
-
/**
* release_open_intent - free up open intent resources
* @nd: pointer to nameidata
@@ -590,42 +602,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
return dentry;
}
-static inline struct dentry *
-do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
-{
- int status = d_revalidate(dentry, nd);
- if (likely(status > 0))
- return dentry;
- if (status == -ECHILD) {
- if (nameidata_dentry_drop_rcu(nd, dentry))
- return ERR_PTR(-ECHILD);
- return do_revalidate(dentry, nd);
- }
- if (status < 0)
- return ERR_PTR(status);
- /* Don't d_invalidate in rcu-walk mode */
- if (nameidata_dentry_drop_rcu(nd, dentry))
- return ERR_PTR(-ECHILD);
- if (!d_invalidate(dentry)) {
- dput(dentry);
- dentry = NULL;
- }
- return dentry;
-}
-
-static inline int need_reval_dot(struct dentry *dentry)
-{
- if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
- return 0;
-
- if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
- return 0;
-
- return 1;
-}
-
/*
- * force_reval_path - force revalidation of a dentry
+ * handle_reval_path - force revalidation of a dentry
*
* In some situations the path walking code will trust dentries without
* revalidating them. This causes problems for filesystems that depend on
@@ -639,27 +617,28 @@ static inline int need_reval_dot(struct dentry *dentry)
* invalidate the dentry. It's up to the caller to handle putting references
* to the path if necessary.
*/
-static int
-force_reval_path(struct path *path, struct nameidata *nd)
+static inline int handle_reval_path(struct nameidata *nd)
{
+ struct dentry *dentry = nd->path.dentry;
int status;
- struct dentry *dentry = path->dentry;
- /*
- * only check on filesystems where it's possible for the dentry to
- * become stale.
- */
- if (!need_reval_dot(dentry))
+ if (likely(!(nd->flags & LOOKUP_JUMPED)))
+ return 0;
+
+ if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
+ return 0;
+
+ if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
return 0;
+ /* Note: we do not d_invalidate() */
status = d_revalidate(dentry, nd);
if (status > 0)
return 0;
- if (!status) {
- d_invalidate(dentry);
+ if (!status)
status = -ESTALE;
- }
+
return status;
}
@@ -728,6 +707,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
path_put(&nd->path);
nd->path = nd->root;
path_get(&nd->root);
+ nd->flags |= LOOKUP_JUMPED;
}
nd->inode = nd->path.dentry->d_inode;
@@ -757,19 +737,42 @@ static inline void path_to_nameidata(const struct path *path,
nd->path.dentry = path->dentry;
}
+static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
+{
+ struct inode *inode = link->dentry->d_inode;
+ if (!IS_ERR(cookie) && inode->i_op->put_link)
+ inode->i_op->put_link(link->dentry, nd, cookie);
+ path_put(link);
+}
+
static __always_inline int
-__do_follow_link(const struct path *link, struct nameidata *nd, void **p)
+follow_link(struct path *link, struct nameidata *nd, void **p)
{
int error;
struct dentry *dentry = link->dentry;
BUG_ON(nd->flags & LOOKUP_RCU);
+ if (link->mnt == nd->path.mnt)
+ mntget(link->mnt);
+
+ if (unlikely(current->total_link_count >= 40)) {
+ *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
+ path_put(&nd->path);
+ return -ELOOP;
+ }
+ cond_resched();
+ current->total_link_count++;
+
touch_atime(link->mnt, dentry);
nd_set_link(nd, NULL);
- if (link->mnt == nd->path.mnt)
- mntget(link->mnt);
+ error = security_inode_follow_link(link->dentry, nd);
+ if (error) {
+ *p = ERR_PTR(error); /* no ->put_link(), please */
+ path_put(&nd->path);
+ return error;
+ }
nd->last_type = LAST_BIND;
*p = dentry->d_inode->i_op->follow_link(dentry, nd);
@@ -780,55 +783,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
if (s)
error = __vfs_follow_link(nd, s);
else if (nd->last_type == LAST_BIND) {
- error = force_reval_path(&nd->path, nd);
- if (error)
+ nd->flags |= LOOKUP_JUMPED;
+ nd->inode = nd->path.dentry->d_inode;
+ if (nd->inode->i_op->follow_link) {
+ /* stepped on a _really_ weird one */
path_put(&nd->path);
+ error = -ELOOP;
+ }
}
}
return error;
}
-/*
- * This limits recursive symlink follows to 8, while
- * limiting consecutive symlinks to 40.
- *
- * Without that kind of total limit, nasty chains of consecutive
- * symlinks can cause almost arbitrarily long lookups.
- */
-static inline int do_follow_link(struct path *path, struct nameidata *nd)
-{
- void *cookie;
- int err = -ELOOP;
-
- /* We drop rcu-walk here */
- if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
- return -ECHILD;
-
- if (current->link_count >= MAX_NESTED_LINKS)
- goto loop;
- if (current->total_link_count >= 40)
- goto loop;
- BUG_ON(nd->depth >= MAX_NESTED_LINKS);
- cond_resched();
- err = security_inode_follow_link(path->dentry, nd);
- if (err)
- goto loop;
- current->link_count++;
- current->total_link_count++;
- nd->depth++;
- err = __do_follow_link(path, nd, &cookie);
- if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
- path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
- path_put(path);
- current->link_count--;
- nd->depth--;
- return err;
-loop:
- path_put_conditional(path, nd);
- path_put(&nd->path);
- return err;
-}
-
static int follow_up_rcu(struct path *path)
{
struct vfsmount *parent;
@@ -1067,7 +1033,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
seq = read_seqcount_begin(&parent->d_seq);
if (read_seqcount_retry(&old->d_seq, nd->seq))
- return -ECHILD;
+ goto failed;
inode = parent->d_inode;
nd->path.dentry = parent;
nd->seq = seq;
@@ -1080,8 +1046,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
}
__follow_mount_rcu(nd, &nd->path, &inode, true);
nd->inode = inode;
-
return 0;
+
+failed:
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ return -ECHILD;
}
/*
@@ -1215,68 +1188,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
{
struct vfsmount *mnt = nd->path.mnt;
struct dentry *dentry, *parent = nd->path.dentry;
- struct inode *dir;
+ int need_reval = 1;
+ int status = 1;
int err;
/*
- * See if the low-level filesystem might want
- * to use its own hash..
- */
- if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
- err = parent->d_op->d_hash(parent, nd->inode, name);
- if (err < 0)
- return err;
- }
-
- /*
* Rename seqlock is not required here because in the off chance
* of a false negative due to a concurrent rename, we're going to
* do the non-racy lookup, below.
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
-
*inode = nd->inode;
dentry = __d_lookup_rcu(parent, name, &seq, inode);
- if (!dentry) {
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- goto need_lookup;
- }
+ if (!dentry)
+ goto unlazy;
+
/* Memory barrier in read_seqcount_begin of child is enough */
if (__read_seqcount_retry(&parent->d_seq, nd->seq))
return -ECHILD;
-
nd->seq = seq;
+
if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
- dentry = do_revalidate_rcu(dentry, nd);
- if (!dentry)
- goto need_lookup;
- if (IS_ERR(dentry))
- goto fail;
- if (!(nd->flags & LOOKUP_RCU))
- goto done;
+ status = d_revalidate(dentry, nd);
+ if (unlikely(status <= 0)) {
+ if (status != -ECHILD)
+ need_reval = 0;
+ goto unlazy;
+ }
}
path->mnt = mnt;
path->dentry = dentry;
if (likely(__follow_mount_rcu(nd, path, inode, false)))
return 0;
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- /* fallthru */
+unlazy:
+ if (dentry) {
+ if (nameidata_dentry_drop_rcu(nd, dentry))
+ return -ECHILD;
+ } else {
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ }
+ } else {
+ dentry = __d_lookup(parent, name);
}
- dentry = __d_lookup(parent, name);
- if (!dentry)
- goto need_lookup;
-found:
- if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
- dentry = do_revalidate(dentry, nd);
- if (!dentry)
- goto need_lookup;
- if (IS_ERR(dentry))
- goto fail;
+
+retry:
+ if (unlikely(!dentry)) {
+ struct inode *dir = parent->d_inode;
+ BUG_ON(nd->inode != dir);
+
+ mutex_lock(&dir->i_mutex);
+ dentry = d_lookup(parent, name);
+ if (likely(!dentry)) {
+ dentry = d_alloc_and_lookup(parent, name, nd);
+ if (IS_ERR(dentry)) {
+ mutex_unlock(&dir->i_mutex);
+ return PTR_ERR(dentry);
+ }
+ /* known good */
+ need_reval = 0;
+ status = 1;
+ }
+ mutex_unlock(&dir->i_mutex);
}
-done:
+ if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
+ status = d_revalidate(dentry, nd);
+ if (unlikely(status <= 0)) {
+ if (status < 0) {
+ dput(dentry);
+ return status;
+ }
+ if (!d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
+ need_reval = 1;
+ goto retry;
+ }
+ }
+
path->mnt = mnt;
path->dentry = dentry;
err = follow_managed(path, nd->flags);
@@ -1286,39 +1276,113 @@ done:
}
*inode = path->dentry->d_inode;
return 0;
+}
+
+static inline int may_lookup(struct nameidata *nd)
+{
+ if (nd->flags & LOOKUP_RCU) {
+ int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
+ if (err != -ECHILD)
+ return err;
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ }
+ return exec_permission(nd->inode, 0);
+}
-need_lookup:
- dir = parent->d_inode;
- BUG_ON(nd->inode != dir);
+static inline int handle_dots(struct nameidata *nd, int type)
+{
+ if (type == LAST_DOTDOT) {
+ if (nd->flags & LOOKUP_RCU) {
+ if (follow_dotdot_rcu(nd))
+ return -ECHILD;
+ } else
+ follow_dotdot(nd);
+ }
+ return 0;
+}
- mutex_lock(&dir->i_mutex);
- /*
- * First re-do the cached lookup just in case it was created
- * while we waited for the directory semaphore, or the first
- * lookup failed due to an unrelated rename.
- *
- * This could use version numbering or similar to avoid unnecessary
- * cache lookups, but then we'd have to do the first lookup in the
- * non-racy way. However in the common case here, everything should
- * be hot in cache, so would it be a big win?
- */
- dentry = d_lookup(parent, name);
- if (likely(!dentry)) {
- dentry = d_alloc_and_lookup(parent, name, nd);
- mutex_unlock(&dir->i_mutex);
- if (IS_ERR(dentry))
- goto fail;
- goto done;
+static void terminate_walk(struct nameidata *nd)
+{
+ if (!(nd->flags & LOOKUP_RCU)) {
+ path_put(&nd->path);
+ } else {
+ nd->flags &= ~LOOKUP_RCU;
+ if (!(nd->flags & LOOKUP_ROOT))
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
}
+}
+
+static inline int walk_component(struct nameidata *nd, struct path *path,
+ struct qstr *name, int type, int follow)
+{
+ struct inode *inode;
+ int err;
/*
- * Uhhuh! Nasty case: the cache was re-populated while
- * we waited on the semaphore. Need to revalidate.
+ * "." and ".." are special - ".." especially so because it has
+ * to be able to know about the current root directory and
+ * parent relationships.
*/
- mutex_unlock(&dir->i_mutex);
- goto found;
+ if (unlikely(type != LAST_NORM))
+ return handle_dots(nd, type);
+ err = do_lookup(nd, name, path, &inode);
+ if (unlikely(err)) {
+ terminate_walk(nd);
+ return err;
+ }
+ if (!inode) {
+ path_to_nameidata(path, nd);
+ terminate_walk(nd);
+ return -ENOENT;
+ }
+ if (unlikely(inode->i_op->follow_link) && follow) {
+ if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
+ return -ECHILD;
+ BUG_ON(inode != path->dentry->d_inode);
+ return 1;
+ }
+ path_to_nameidata(path, nd);
+ nd->inode = inode;
+ return 0;
+}
-fail:
- return PTR_ERR(dentry);
+/*
+ * This limits recursive symlink follows to 8, while
+ * limiting consecutive symlinks to 40.
+ *
+ * Without that kind of total limit, nasty chains of consecutive
+ * symlinks can cause almost arbitrarily long lookups.
+ */
+static inline int nested_symlink(struct path *path, struct nameidata *nd)
+{
+ int res;
+
+ BUG_ON(nd->depth >= MAX_NESTED_LINKS);
+ if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
+ path_put_conditional(path, nd);
+ path_put(&nd->path);
+ return -ELOOP;
+ }
+
+ nd->depth++;
+ current->link_count++;
+
+ do {
+ struct path link = *path;
+ void *cookie;
+
+ res = follow_link(&link, nd, &cookie);
+ if (!res)
+ res = walk_component(nd, path, &nd->last,
+ nd->last_type, LOOKUP_FOLLOW);
+ put_link(nd, &link, cookie);
+ } while (res > 0);
+
+ current->link_count--;
+ nd->depth--;
+ return res;
}
/*
@@ -1338,30 +1402,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
while (*name=='/')
name++;
if (!*name)
- goto return_reval;
-
- if (nd->depth)
- lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
+ return 0;
/* At this point we know we have a real path component. */
for(;;) {
- struct inode *inode;
unsigned long hash;
struct qstr this;
unsigned int c;
+ int type;
nd->flags |= LOOKUP_CONTINUE;
- if (nd->flags & LOOKUP_RCU) {
- err = exec_permission(nd->inode, IPERM_FLAG_RCU);
- if (err == -ECHILD) {
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
- goto exec_again;
- }
- } else {
-exec_again:
- err = exec_permission(nd->inode, 0);
- }
+
+ err = may_lookup(nd);
if (err)
break;
@@ -1377,53 +1429,43 @@ exec_again:
this.len = name - (const char *) this.name;
this.hash = end_name_hash(hash);
+ type = LAST_NORM;
+ if (this.name[0] == '.') switch (this.len) {
+ case 2:
+ if (this.name[1] == '.') {
+ type = LAST_DOTDOT;
+ nd->flags |= LOOKUP_JUMPED;
+ }
+ break;
+ case 1:
+ type = LAST_DOT;
+ }
+ if (likely(type == LAST_NORM)) {
+ struct dentry *parent = nd->path.dentry;
+ nd->flags &= ~LOOKUP_JUMPED;
+ if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
+ err = parent->d_op->d_hash(parent, nd->inode,
+ &this);
+ if (err < 0)
+ break;
+ }
+ }
+
/* remove trailing slashes? */
if (!c)
goto last_component;
while (*++name == '/');
if (!*name)
- goto last_with_slashes;
+ goto last_component;
- /*
- * "." and ".." are special - ".." especially so because it has
- * to be able to know about the current root directory and
- * parent relationships.
- */
- if (this.name[0] == '.') switch (this.len) {
- default:
- break;
- case 2:
- if (this.name[1] != '.')
- break;
- if (nd->flags & LOOKUP_RCU) {
- if (follow_dotdot_rcu(nd))
- return -ECHILD;
- } else
- follow_dotdot(nd);
- /* fallthrough */
- case 1:
- continue;
- }
- /* This does the actual lookups.. */
- err = do_lookup(nd, &this, &next, &inode);
- if (err)
- break;
- err = -ENOENT;
- if (!inode)
- goto out_dput;
+ err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
+ if (err < 0)
+ return err;
- if (inode->i_op->follow_link) {
- BUG_ON(inode != next.dentry->d_inode);
- err = do_follow_link(&next, nd);
+ if (err) {
+ err = nested_symlink(&next, nd);
if (err)
- goto return_err;
- nd->inode = nd->path.dentry->d_inode;
- err = -ENOENT;
- if (!nd->inode)
- break;
- } else {
- path_to_nameidata(&next, nd);
- nd->inode = inode;
+ return err;
}
err = -ENOTDIR;
if (!nd->inode->i_op->lookup)
@@ -1431,210 +1473,109 @@ exec_again:
continue;
/* here ends the main loop */
-last_with_slashes:
- lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
last_component:
/* Clear LOOKUP_CONTINUE iff it was previously unset */
nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
- if (lookup_flags & LOOKUP_PARENT)
- goto lookup_parent;
- if (this.name[0] == '.') switch (this.len) {
- default:
- break;
- case 2:
- if (this.name[1] != '.')
- break;
- if (nd->flags & LOOKUP_RCU) {
- if (follow_dotdot_rcu(nd))
- return -ECHILD;
- } else
- follow_dotdot(nd);
- /* fallthrough */
- case 1:
- goto return_reval;
- }
- err = do_lookup(nd, &this, &next, &inode);
- if (err)
- break;
- if (inode && unlikely(inode->i_op->follow_link) &&
- (lookup_flags & LOOKUP_FOLLOW)) {
- BUG_ON(inode != next.dentry->d_inode);
- err = do_follow_link(&next, nd);
- if (err)
- goto return_err;
- nd->inode = nd->path.dentry->d_inode;
- } else {
- path_to_nameidata(&next, nd);
- nd->inode = inode;
- }
- err = -ENOENT;
- if (!nd->inode)
- break;
- if (lookup_flags & LOOKUP_DIRECTORY) {
- err = -ENOTDIR;
- if (!nd->inode->i_op->lookup)
- break;
- }
- goto return_base;
-lookup_parent:
nd->last = this;
- nd->last_type = LAST_NORM;
- if (this.name[0] != '.')
- goto return_base;
- if (this.len == 1)
- nd->last_type = LAST_DOT;
- else if (this.len == 2 && this.name[1] == '.')
- nd->last_type = LAST_DOTDOT;
- else
- goto return_base;
-return_reval:
- /*
- * We bypassed the ordinary revalidation routines.
- * We may need to check the cached dentry for staleness.
- */
- if (need_reval_dot(nd->path.dentry)) {
- if (nameidata_drop_rcu_last_maybe(nd))
- return -ECHILD;
- /* Note: we do not d_invalidate() */
- err = d_revalidate(nd->path.dentry, nd);
- if (!err)
- err = -ESTALE;
- if (err < 0)
- break;
- return 0;
- }
-return_base:
- if (nameidata_drop_rcu_last_maybe(nd))
- return -ECHILD;
+ nd->last_type = type;
return 0;
-out_dput:
- if (!(nd->flags & LOOKUP_RCU))
- path_put_conditional(&next, nd);
- break;
}
- if (!(nd->flags & LOOKUP_RCU))
- path_put(&nd->path);
-return_err:
+ terminate_walk(nd);
return err;
}
-static inline int path_walk_rcu(const char *name, struct nameidata *nd)
-{
- current->total_link_count = 0;
-
- return link_path_walk(name, nd);
-}
-
-static inline int path_walk_simple(const char *name, struct nameidata *nd)
-{
- current->total_link_count = 0;
-
- return link_path_walk(name, nd);
-}
-
-static int path_walk(const char *name, struct nameidata *nd)
-{
- struct path save = nd->path;
- int result;
-
- current->total_link_count = 0;
-
- /* make sure the stuff we saved doesn't go away */
- path_get(&save);
-
- result = link_path_walk(name, nd);
- if (result == -ESTALE) {
- /* nd->path had been dropped */
- current->total_link_count = 0;
- nd->path = save;
- path_get(&nd->path);
- nd->flags |= LOOKUP_REVAL;
- result = link_path_walk(name, nd);
- }
-
- path_put(&save);
-
- return result;
-}
-
-static void path_finish_rcu(struct nameidata *nd)
-{
- if (nd->flags & LOOKUP_RCU) {
- /* RCU dangling. Cancel it. */
- nd->flags &= ~LOOKUP_RCU;
- nd->root.mnt = NULL;
- rcu_read_unlock();
- br_read_unlock(vfsmount_lock);
- }
- if (nd->file)
- fput(nd->file);
-}
-
-static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
+static int path_init(int dfd, const char *name, unsigned int flags,
+ struct nameidata *nd, struct file **fp)
{
int retval = 0;
int fput_needed;
struct file *file;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
- nd->flags = flags | LOOKUP_RCU;
+ nd->flags = flags | LOOKUP_JUMPED;
nd->depth = 0;
+ if (flags & LOOKUP_ROOT) {
+ struct inode *inode = nd->root.dentry->d_inode;
+ if (*name) {
+ if (!inode->i_op->lookup)
+ return -ENOTDIR;
+ retval = inode_permission(inode, MAY_EXEC);
+ if (retval)
+ return retval;
+ }
+ nd->path = nd->root;
+ nd->inode = inode;
+ if (flags & LOOKUP_RCU) {
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ } else {
+ path_get(&nd->path);
+ }
+ return 0;
+ }
+
nd->root.mnt = NULL;
- nd->file = NULL;
if (*name=='/') {
- struct fs_struct *fs = current->fs;
- unsigned seq;
-
- br_read_lock(vfsmount_lock);
- rcu_read_lock();
-
- do {
- seq = read_seqcount_begin(&fs->seq);
- nd->root = fs->root;
- nd->path = nd->root;
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- } while (read_seqcount_retry(&fs->seq, seq));
-
+ if (flags & LOOKUP_RCU) {
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ set_root_rcu(nd);
+ } else {
+ set_root(nd);
+ path_get(&nd->root);
+ }
+ nd->path = nd->root;
} else if (dfd == AT_FDCWD) {
- struct fs_struct *fs = current->fs;
- unsigned seq;
-
- br_read_lock(vfsmount_lock);
- rcu_read_lock();
+ if (flags & LOOKUP_RCU) {
+ struct fs_struct *fs = current->fs;
+ unsigned seq;
- do {
- seq = read_seqcount_begin(&fs->seq);
- nd->path = fs->pwd;
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- } while (read_seqcount_retry(&fs->seq, seq));
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ nd->path = fs->pwd;
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ } while (read_seqcount_retry(&fs->seq, seq));
+ } else {
+ get_fs_pwd(current->fs, &nd->path);
+ }
} else {
struct dentry *dentry;
- file = fget_light(dfd, &fput_needed);
+ file = fget_raw_light(dfd, &fput_needed);
retval = -EBADF;
if (!file)
goto out_fail;
dentry = file->f_path.dentry;
- retval = -ENOTDIR;
- if (!S_ISDIR(dentry->d_inode->i_mode))
- goto fput_fail;
+ if (*name) {
+ retval = -ENOTDIR;
+ if (!S_ISDIR(dentry->d_inode->i_mode))
+ goto fput_fail;
- retval = file_permission(file, MAY_EXEC);
- if (retval)
- goto fput_fail;
+ retval = file_permission(file, MAY_EXEC);
+ if (retval)
+ goto fput_fail;
+ }
nd->path = file->f_path;
- if (fput_needed)
- nd->file = file;
-
- nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
- br_read_lock(vfsmount_lock);
- rcu_read_lock();
+ if (flags & LOOKUP_RCU) {
+ if (fput_needed)
+ *fp = file;
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ } else {
+ path_get(&file->f_path);
+ fput_light(file, fput_needed);
+ }
}
+
nd->inode = nd->path.dentry->d_inode;
return 0;
@@ -1644,60 +1585,23 @@ out_fail:
return retval;
}
-static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
+static inline int lookup_last(struct nameidata *nd, struct path *path)
{
- int retval = 0;
- int fput_needed;
- struct file *file;
-
- nd->last_type = LAST_ROOT; /* if there are only slashes... */
- nd->flags = flags;
- nd->depth = 0;
- nd->root.mnt = NULL;
-
- if (*name=='/') {
- set_root(nd);
- nd->path = nd->root;
- path_get(&nd->root);
- } else if (dfd == AT_FDCWD) {
- get_fs_pwd(current->fs, &nd->path);
- } else {
- struct dentry *dentry;
-
- file = fget_light(dfd, &fput_needed);
- retval = -EBADF;
- if (!file)
- goto out_fail;
-
- dentry = file->f_path.dentry;
-
- retval = -ENOTDIR;
- if (!S_ISDIR(dentry->d_inode->i_mode))
- goto fput_fail;
+ if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
+ nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
- retval = file_permission(file, MAY_EXEC);
- if (retval)
- goto fput_fail;
-
- nd->path = file->f_path;
- path_get(&file->f_path);
-
- fput_light(file, fput_needed);
- }
- nd->inode = nd->path.dentry->d_inode;
- return 0;
-
-fput_fail:
- fput_light(file, fput_needed);
-out_fail:
- return retval;
+ nd->flags &= ~LOOKUP_PARENT;
+ return walk_component(nd, path, &nd->last, nd->last_type,
+ nd->flags & LOOKUP_FOLLOW);
}
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int do_path_lookup(int dfd, const char *name,
+static int path_lookupat(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
- int retval;
+ struct file *base = NULL;
+ struct path path;
+ int err;
/*
* Path walking is largely split up into 2 different synchronisation
@@ -1713,44 +1617,75 @@ static int do_path_lookup(int dfd, const char *name,
* be handled by restarting a traditional ref-walk (which will always
* be able to complete).
*/
- retval = path_init_rcu(dfd, name, flags, nd);
- if (unlikely(retval))
- return retval;
- retval = path_walk_rcu(name, nd);
- path_finish_rcu(nd);
- if (nd->root.mnt) {
- path_put(&nd->root);
- nd->root.mnt = NULL;
+ err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
+
+ if (unlikely(err))
+ return err;
+
+ current->total_link_count = 0;
+ err = link_path_walk(name, nd);
+
+ if (!err && !(flags & LOOKUP_PARENT)) {
+ err = lookup_last(nd, &path);
+ while (err > 0) {
+ void *cookie;
+ struct path link = path;
+ nd->flags |= LOOKUP_PARENT;
+ err = follow_link(&link, nd, &cookie);
+ if (!err)
+ err = lookup_last(nd, &path);
+ put_link(nd, &link, cookie);
+ }
}
- if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
- /* slower, locked walk */
- if (retval == -ESTALE)
- flags |= LOOKUP_REVAL;
- retval = path_init(dfd, name, flags, nd);
- if (unlikely(retval))
- return retval;
- retval = path_walk(name, nd);
- if (nd->root.mnt) {
- path_put(&nd->root);
- nd->root.mnt = NULL;
+ if (nd->flags & LOOKUP_RCU) {
+ /* went all way through without dropping RCU */
+ BUG_ON(err);
+ if (nameidata_drop_rcu_last(nd))
+ err = -ECHILD;
+ }
+
+ if (!err)
+ err = handle_reval_path(nd);
+
+ if (!err && nd->flags & LOOKUP_DIRECTORY) {
+ if (!nd->inode->i_op->lookup) {
+ path_put(&nd->path);
+ return -ENOTDIR;
}
}
+ if (base)
+ fput(base);
+
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+ path_put(&nd->root);
+ nd->root.mnt = NULL;
+ }
+ return err;
+}
+
+static int do_path_lookup(int dfd, const char *name,
+ unsigned int flags, struct nameidata *nd)
+{
+ int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
+ if (unlikely(retval == -ECHILD))
+ retval = path_lookupat(dfd, name, flags, nd);
+ if (unlikely(retval == -ESTALE))
+ retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
+
if (likely(!retval)) {
if (unlikely(!audit_dummy_context())) {
if (nd->path.dentry && nd->inode)
audit_inode(name, nd->path.dentry);
}
}
-
return retval;
}
-int path_lookup(const char *name, unsigned int flags,
- struct nameidata *nd)
+int kern_path_parent(const char *name, struct nameidata *nd)
{
- return do_path_lookup(AT_FDCWD, name, flags, nd);
+ return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
}
int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1774,29 +1709,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
const char *name, unsigned int flags,
struct nameidata *nd)
{
- int retval;
-
- /* same as do_path_lookup */
- nd->last_type = LAST_ROOT;
- nd->flags = flags;
- nd->depth = 0;
-
- nd->path.dentry = dentry;
- nd->path.mnt = mnt;
- path_get(&nd->path);
- nd->root = nd->path;
- path_get(&nd->root);
- nd->inode = nd->path.dentry->d_inode;
-
- retval = path_walk(name, nd);
- if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
- nd->inode))
- audit_inode(name, nd->path.dentry);
-
- path_put(&nd->root);
- nd->root.mnt = NULL;
-
- return retval;
+ nd->root.dentry = dentry;
+ nd->root.mnt = mnt;
+ /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
+ return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
}
static struct dentry *__lookup_hash(struct qstr *name,
@@ -1811,17 +1727,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
return ERR_PTR(err);
/*
- * See if the low-level filesystem might want
- * to use its own hash..
- */
- if (base->d_flags & DCACHE_OP_HASH) {
- err = base->d_op->d_hash(base, inode, name);
- dentry = ERR_PTR(err);
- if (err < 0)
- goto out;
- }
-
- /*
* Don't bother with __d_lookup: callers are for creat as
* well as unlink, so a lot of the time it would cost
* a double lookup.
@@ -1833,7 +1738,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
if (!dentry)
dentry = d_alloc_and_lookup(base, name, nd);
-out:
+
return dentry;
}
@@ -1847,28 +1752,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
return __lookup_hash(&nd->last, nd->path.dentry, nd);
}
-static int __lookup_one_len(const char *name, struct qstr *this,
- struct dentry *base, int len)
-{
- unsigned long hash;
- unsigned int c;
-
- this->name = name;
- this->len = len;
- if (!len)
- return -EACCES;
-
- hash = init_name_hash();
- while (len--) {
- c = *(const unsigned char *)name++;
- if (c == '/' || c == '\0')
- return -EACCES;
- hash = partial_name_hash(c, hash);
- }
- this->hash = end_name_hash(hash);
- return 0;
-}
-
/**
* lookup_one_len - filesystem helper to lookup single pathname component
* @name: pathname component to lookup
@@ -1882,14 +1765,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
*/
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
- int err;
struct qstr this;
+ unsigned long hash;
+ unsigned int c;
WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
- err = __lookup_one_len(name, &this, base, len);
- if (err)
- return ERR_PTR(err);
+ this.name = name;
+ this.len = len;
+ if (!len)
+ return ERR_PTR(-EACCES);
+
+ hash = init_name_hash();
+ while (len--) {
+ c = *(const unsigned char *)name++;
+ if (c == '/' || c == '\0')
+ return ERR_PTR(-EACCES);
+ hash = partial_name_hash(c, hash);
+ }
+ this.hash = end_name_hash(hash);
+ /*
+ * See if the low-level filesystem might want
+ * to use its own hash..
+ */
+ if (base->d_flags & DCACHE_OP_HASH) {
+ int err = base->d_op->d_hash(base, base->d_inode, &this);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
return __lookup_hash(&this, base, NULL);
}
@@ -1898,7 +1801,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
struct path *path)
{
struct nameidata nd;
- char *tmp = getname(name);
+ char *tmp = getname_flags(name, flags);
int err = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
@@ -2078,12 +1981,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
return error;
}
-int may_open(struct path *path, int acc_mode, int flag)
+static int may_open(struct path *path, int acc_mode, int flag)
{
struct dentry *dentry = path->dentry;
struct inode *inode = dentry->d_inode;
int error;
+ /* O_PATH? */
+ if (!acc_mode)
+ return 0;
+
if (!inode)
return -ENOENT;
@@ -2152,34 +2059,6 @@ static int handle_truncate(struct file *filp)
}
/*
- * Be careful about ever adding any more callers of this
- * function. Its flags must be in the namei format, not
- * what get passed to sys_open().
- */
-static int __open_namei_create(struct nameidata *nd, struct path *path,
- int open_flag, int mode)
-{
- int error;
- struct dentry *dir = nd->path.dentry;
-
- if (!IS_POSIXACL(dir->d_inode))
- mode &= ~current_umask();
- error = security_path_mknod(&nd->path, path->dentry, mode, 0);
- if (error)
- goto out_unlock;
- error = vfs_create(dir->d_inode, path->dentry, mode, nd);
-out_unlock:
- mutex_unlock(&dir->d_inode->i_mutex);
- dput(nd->path.dentry);
- nd->path.dentry = path->dentry;
-
- if (error)
- return error;
- /* Don't check for write permission, don't truncate */
- return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
-}
-
-/*
* Note that while the flag value (low two bits) for sys_open means:
* 00 - read-only
* 01 - write-only
@@ -2203,126 +2082,115 @@ static inline int open_to_namei_flags(int flag)
return flag;
}
-static int open_will_truncate(int flag, struct inode *inode)
-{
- /*
- * We'll never write to the fs underlying
- * a device file.
- */
- if (special_file(inode->i_mode))
- return 0;
- return (flag & O_TRUNC);
-}
-
-static struct file *finish_open(struct nameidata *nd,
- int open_flag, int acc_mode)
-{
- struct file *filp;
- int will_truncate;
- int error;
-
- will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
- if (will_truncate) {
- error = mnt_want_write(nd->path.mnt);
- if (error)
- goto exit;
- }
- error = may_open(&nd->path, acc_mode, open_flag);
- if (error) {
- if (will_truncate)
- mnt_drop_write(nd->path.mnt);
- goto exit;
- }
- filp = nameidata_to_filp(nd);
- if (!IS_ERR(filp)) {
- error = ima_file_check(filp, acc_mode);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- if (!IS_ERR(filp)) {
- if (will_truncate) {
- error = handle_truncate(filp);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- }
- /*
- * It is now safe to drop the mnt write
- * because the filp has had a write taken
- * on its behalf.
- */
- if (will_truncate)
- mnt_drop_write(nd->path.mnt);
- path_put(&nd->path);
- return filp;
-
-exit:
- path_put(&nd->path);
- return ERR_PTR(error);
-}
-
/*
- * Handle O_CREAT case for do_filp_open
+ * Handle the last step of open()
*/
static struct file *do_last(struct nameidata *nd, struct path *path,
- int open_flag, int acc_mode,
- int mode, const char *pathname)
+ const struct open_flags *op, const char *pathname)
{
struct dentry *dir = nd->path.dentry;
+ struct dentry *dentry;
+ int open_flag = op->open_flag;
+ int will_truncate = open_flag & O_TRUNC;
+ int want_write = 0;
+ int acc_mode = op->acc_mode;
struct file *filp;
- int error = -EISDIR;
+ int error;
+
+ nd->flags &= ~LOOKUP_PARENT;
+ nd->flags |= op->intent;
switch (nd->last_type) {
case LAST_DOTDOT:
- follow_dotdot(nd);
- dir = nd->path.dentry;
case LAST_DOT:
- if (need_reval_dot(dir)) {
- int status = d_revalidate(nd->path.dentry, nd);
- if (!status)
- status = -ESTALE;
- if (status < 0) {
- error = status;
- goto exit;
- }
- }
+ error = handle_dots(nd, nd->last_type);
+ if (error)
+ return ERR_PTR(error);
/* fallthrough */
case LAST_ROOT:
- goto exit;
+ if (nd->flags & LOOKUP_RCU) {
+ if (nameidata_drop_rcu_last(nd))
+ return ERR_PTR(-ECHILD);
+ }
+ error = handle_reval_path(nd);
+ if (error)
+ goto exit;
+ audit_inode(pathname, nd->path.dentry);
+ if (open_flag & O_CREAT) {
+ error = -EISDIR;
+ goto exit;
+ }
+ goto ok;
case LAST_BIND:
+ /* can't be RCU mode here */
+ error = handle_reval_path(nd);
+ if (error)
+ goto exit;
audit_inode(pathname, dir);
goto ok;
}
+ if (!(open_flag & O_CREAT)) {
+ int symlink_ok = 0;
+ if (nd->last.name[nd->last.len])
+ nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+ if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
+ symlink_ok = 1;
+ /* we _can_ be in RCU mode here */
+ error = walk_component(nd, path, &nd->last, LAST_NORM,
+ !symlink_ok);
+ if (error < 0)
+ return ERR_PTR(error);
+ if (error) /* symlink */
+ return NULL;
+ /* sayonara */
+ if (nd->flags & LOOKUP_RCU) {
+ if (nameidata_drop_rcu_last(nd))
+ return ERR_PTR(-ECHILD);
+ }
+
+ error = -ENOTDIR;
+ if (nd->flags & LOOKUP_DIRECTORY) {
+ if (!nd->inode->i_op->lookup)
+ goto exit;
+ }
+ audit_inode(pathname, nd->path.dentry);
+ goto ok;
+ }
+
+ /* create side of things */
+
+ if (nd->flags & LOOKUP_RCU) {
+ if (nameidata_drop_rcu_last(nd))
+ return ERR_PTR(-ECHILD);
+ }
+
+ audit_inode(pathname, dir);
+ error = -EISDIR;
/* trailing slashes? */
if (nd->last.name[nd->last.len])
goto exit;
mutex_lock(&dir->d_inode->i_mutex);
- path->dentry = lookup_hash(nd);
- path->mnt = nd->path.mnt;
-
- error = PTR_ERR(path->dentry);
- if (IS_ERR(path->dentry)) {
+ dentry = lookup_hash(nd);
+ error = PTR_ERR(dentry);
+ if (IS_ERR(dentry)) {
mutex_unlock(&dir->d_inode->i_mutex);
goto exit;
}
- if (IS_ERR(nd->intent.open.file)) {
- error = PTR_ERR(nd->intent.open.file);
- goto exit_mutex_unlock;
- }
+ path->dentry = dentry;
+ path->mnt = nd->path.mnt;
/* Negative dentry, just create the file */
- if (!path->dentry->d_inode) {
+ if (!dentry->d_inode) {
+ int mode = op->mode;
+ if (!IS_POSIXACL(dir->d_inode))
+ mode &= ~current_umask();
/*
* This write is needed to ensure that a
- * ro->rw transition does not occur between
+ * rw->ro transition does not occur between
* the time when the file is created and when
* a permanent write count is taken through
* the 'struct file' in nameidata_to_filp().
@@ -2330,22 +2198,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
error = mnt_want_write(nd->path.mnt);
if (error)
goto exit_mutex_unlock;
- error = __open_namei_create(nd, path, open_flag, mode);
- if (error) {
- mnt_drop_write(nd->path.mnt);
- goto exit;
- }
- filp = nameidata_to_filp(nd);
- mnt_drop_write(nd->path.mnt);
- path_put(&nd->path);
- if (!IS_ERR(filp)) {
- error = ima_file_check(filp, acc_mode);
- if (error) {
- fput(filp);
- filp = ERR_PTR(error);
- }
- }
- return filp;
+ want_write = 1;
+ /* Don't check for write permission, don't truncate */
+ open_flag &= ~O_TRUNC;
+ will_truncate = 0;
+ acc_mode = MAY_OPEN;
+ error = security_path_mknod(&nd->path, dentry, mode, 0);
+ if (error)
+ goto exit_mutex_unlock;
+ error = vfs_create(dir->d_inode, dentry, mode, nd);
+ if (error)
+ goto exit_mutex_unlock;
+ mutex_unlock(&dir->d_inode->i_mutex);
+ dput(nd->path.dentry);
+ nd->path.dentry = dentry;
+ goto common;
}
/*
@@ -2375,7 +2242,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
if (S_ISDIR(nd->inode->i_mode))
goto exit;
ok:
- filp = finish_open(nd, open_flag, acc_mode);
+ if (!S_ISREG(nd->inode->i_mode))
+ will_truncate = 0;
+
+ if (will_truncate) {
+ error = mnt_want_write(nd->path.mnt);
+ if (error)
+ goto exit;
+ want_write = 1;
+ }
+common:
+ error = may_open(&nd->path, acc_mode, open_flag);
+ if (error)
+ goto exit;
+ filp = nameidata_to_filp(nd);
+ if (!IS_ERR(filp)) {
+ error = ima_file_check(filp, op->acc_mode);
+ if (error) {
+ fput(filp);
+ filp = ERR_PTR(error);
+ }
+ }
+ if (!IS_ERR(filp)) {
+ if (will_truncate) {
+ error = handle_truncate(filp);
+ if (error) {
+ fput(filp);
+ filp = ERR_PTR(error);
+ }
+ }
+ }
+out:
+ if (want_write)
+ mnt_drop_write(nd->path.mnt);
+ path_put(&nd->path);
return filp;
exit_mutex_unlock:
@@ -2383,197 +2283,103 @@ exit_mutex_unlock:
exit_dput:
path_put_conditional(path, nd);
exit:
- path_put(&nd->path);
- return ERR_PTR(error);
+ filp = ERR_PTR(error);
+ goto out;
}
-/*
- * Note that the low bits of the passed in "open_flag"
- * are not the same as in the local variable "flag". See
- * open_to_namei_flags() for more details.
- */
-struct file *do_filp_open(int dfd, const char *pathname,
- int open_flag, int mode, int acc_mode)
+static struct file *path_openat(int dfd, const char *pathname,
+ struct nameidata *nd, const struct open_flags *op, int flags)
{
+ struct file *base = NULL;
struct file *filp;
- struct nameidata nd;
- int error;
struct path path;
- int count = 0;
- int flag = open_to_namei_flags(open_flag);
- int flags;
-
- if (!(open_flag & O_CREAT))
- mode = 0;
-
- /* Must never be set by userspace */
- open_flag &= ~FMODE_NONOTIFY;
-
- /*
- * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
- * check for O_DSYNC if the need any syncing at all we enforce it's
- * always set instead of having to deal with possibly weird behaviour
- * for malicious applications setting only __O_SYNC.
- */
- if (open_flag & __O_SYNC)
- open_flag |= O_DSYNC;
-
- if (!acc_mode)
- acc_mode = MAY_OPEN | ACC_MODE(open_flag);
-
- /* O_TRUNC implies we need access checks for write permissions */
- if (open_flag & O_TRUNC)
- acc_mode |= MAY_WRITE;
-
- /* Allow the LSM permission hook to distinguish append
- access from general write access. */
- if (open_flag & O_APPEND)
- acc_mode |= MAY_APPEND;
-
- flags = LOOKUP_OPEN;
- if (open_flag & O_CREAT) {
- flags |= LOOKUP_CREATE;
- if (open_flag & O_EXCL)
- flags |= LOOKUP_EXCL;
- }
- if (open_flag & O_DIRECTORY)
- flags |= LOOKUP_DIRECTORY;
- if (!(open_flag & O_NOFOLLOW))
- flags |= LOOKUP_FOLLOW;
+ int error;
filp = get_empty_filp();
if (!filp)
return ERR_PTR(-ENFILE);
- filp->f_flags = open_flag;
- nd.intent.open.file = filp;
- nd.intent.open.flags = flag;
- nd.intent.open.create_mode = mode;
-
- if (open_flag & O_CREAT)
- goto creat;
+ filp->f_flags = op->open_flag;
+ nd->intent.open.file = filp;
+ nd->intent.open.flags = open_to_namei_flags(op->open_flag);
+ nd->intent.open.create_mode = op->mode;
- /* !O_CREAT, simple open */
- error = do_path_lookup(dfd, pathname, flags, &nd);
+ error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
if (unlikely(error))
goto out_filp;
- error = -ELOOP;
- if (!(nd.flags & LOOKUP_FOLLOW)) {
- if (nd.inode->i_op->follow_link)
- goto out_path;
- }
- error = -ENOTDIR;
- if (nd.flags & LOOKUP_DIRECTORY) {
- if (!nd.inode->i_op->lookup)
- goto out_path;
- }
- audit_inode(pathname, nd.path.dentry);
- filp = finish_open(&nd, open_flag, acc_mode);
- release_open_intent(&nd);
- return filp;
-creat:
- /* OK, have to create the file. Find the parent. */
- error = path_init_rcu(dfd, pathname,
- LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
- if (error)
- goto out_filp;
- error = path_walk_rcu(pathname, &nd);
- path_finish_rcu(&nd);
- if (unlikely(error == -ECHILD || error == -ESTALE)) {
- /* slower, locked walk */
- if (error == -ESTALE) {
-reval:
- flags |= LOOKUP_REVAL;
- }
- error = path_init(dfd, pathname,
- LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
- if (error)
- goto out_filp;
-
- error = path_walk_simple(pathname, &nd);
- }
+ current->total_link_count = 0;
+ error = link_path_walk(pathname, nd);
if (unlikely(error))
goto out_filp;
- if (unlikely(!audit_dummy_context()))
- audit_inode(pathname, nd.path.dentry);
- /*
- * We have the parent and last component.
- */
- nd.flags = flags;
- filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
+ filp = do_last(nd, &path, op, pathname);
while (unlikely(!filp)) { /* trailing symlink */
struct path link = path;
- struct inode *linki = link.dentry->d_inode;
void *cookie;
- error = -ELOOP;
- if (!(nd.flags & LOOKUP_FOLLOW))
- goto exit_dput;
- if (count++ == 32)
- goto exit_dput;
- /*
- * This is subtle. Instead of calling do_follow_link() we do
- * the thing by hands. The reason is that this way we have zero
- * link_count and path_walk() (called from ->follow_link)
- * honoring LOOKUP_PARENT. After that we have the parent and
- * last component, i.e. we are in the same situation as after
- * the first path_walk(). Well, almost - if the last component
- * is normal we get its copy stored in nd->last.name and we will
- * have to putname() it when we are done. Procfs-like symlinks
- * just set LAST_BIND.
- */
- nd.flags |= LOOKUP_PARENT;
- error = security_inode_follow_link(link.dentry, &nd);
- if (error)
- goto exit_dput;
- error = __do_follow_link(&link, &nd, &cookie);
- if (unlikely(error)) {
- if (!IS_ERR(cookie) && linki->i_op->put_link)
- linki->i_op->put_link(link.dentry, &nd, cookie);
- /* nd.path had been dropped */
- nd.path = link;
- goto out_path;
+ if (!(nd->flags & LOOKUP_FOLLOW)) {
+ path_put_conditional(&path, nd);
+ path_put(&nd->path);
+ filp = ERR_PTR(-ELOOP);
+ break;
}
- nd.flags &= ~LOOKUP_PARENT;
- filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
- if (linki->i_op->put_link)
- linki->i_op->put_link(link.dentry, &nd, cookie);
- path_put(&link);
+ nd->flags |= LOOKUP_PARENT;
+ nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
+ error = follow_link(&link, nd, &cookie);
+ if (unlikely(error))
+ filp = ERR_PTR(error);
+ else
+ filp = do_last(nd, &path, op, pathname);
+ put_link(nd, &link, cookie);
}
out:
- if (nd.root.mnt)
- path_put(&nd.root);
- if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
- goto reval;
- release_open_intent(&nd);
+ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
+ path_put(&nd->root);
+ if (base)
+ fput(base);
+ release_open_intent(nd);
return filp;
-exit_dput:
- path_put_conditional(&path, &nd);
-out_path:
- path_put(&nd.path);
out_filp:
filp = ERR_PTR(error);
goto out;
}
-/**
- * filp_open - open file and return file pointer
- *
- * @filename: path to open
- * @flags: open flags as per the open(2) second argument
- * @mode: mode for the new file if O_CREAT is set, else ignored
- *
- * This is the helper to open a file from kernelspace if you really
- * have to. But in generally you should not do this, so please move
- * along, nothing to see here..
- */
-struct file *filp_open(const char *filename, int flags, int mode)
+struct file *do_filp_open(int dfd, const char *pathname,
+ const struct open_flags *op, int flags)
+{
+ struct nameidata nd;
+ struct file *filp;
+
+ filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
+ if (unlikely(filp == ERR_PTR(-ECHILD)))
+ filp = path_openat(dfd, pathname, &nd, op, flags);
+ if (unlikely(filp == ERR_PTR(-ESTALE)))
+ filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
+ return filp;
+}
+
+struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
+ const char *name, const struct open_flags *op, int flags)
{
- return do_filp_open(AT_FDCWD, filename, flags, mode, 0);
+ struct nameidata nd;
+ struct file *file;
+
+ nd.root.mnt = mnt;
+ nd.root.dentry = dentry;
+
+ flags |= LOOKUP_ROOT;
+
+ if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
+ return ERR_PTR(-ELOOP);
+
+ file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
+ if (unlikely(file == ERR_PTR(-ECHILD)))
+ file = path_openat(-1, name, &nd, op, flags);
+ if (unlikely(file == ERR_PTR(-ESTALE)))
+ file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
+ return file;
}
-EXPORT_SYMBOL(filp_open);
/**
* lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -3112,7 +2918,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
return error;
mutex_lock(&inode->i_mutex);
- error = dir->i_op->link(old_dentry, dir, new_dentry);
+ /* Make sure we don't allow creating hardlink to an unlinked file */
+ if (inode->i_nlink == 0)
+ error = -ENOENT;
+ else
+ error = dir->i_op->link(old_dentry, dir, new_dentry);
mutex_unlock(&inode->i_mutex);
if (!error)
fsnotify_link(dir, inode, new_dentry);
@@ -3134,15 +2944,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
struct dentry *new_dentry;
struct nameidata nd;
struct path old_path;
+ int how = 0;
int error;
char *to;
- if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
+ if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
return -EINVAL;
+ /*
+ * To use null names we require CAP_DAC_READ_SEARCH
+ * This ensures that not everyone will be able to create
+ * handlink using the passed filedescriptor.
+ */
+ if (flags & AT_EMPTY_PATH) {
+ if (!capable(CAP_DAC_READ_SEARCH))
+ return -ENOENT;
+ how = LOOKUP_EMPTY;
+ }
+
+ if (flags & AT_SYMLINK_FOLLOW)
+ how |= LOOKUP_FOLLOW;
- error = user_path_at(olddfd, oldname,
- flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
- &old_path);
+ error = user_path_at(olddfd, oldname, how, &old_path);
if (error)
return error;
@@ -3579,7 +3401,7 @@ EXPORT_SYMBOL(page_readlink);
EXPORT_SYMBOL(__page_symlink);
EXPORT_SYMBOL(page_symlink);
EXPORT_SYMBOL(page_symlink_inode_operations);
-EXPORT_SYMBOL(path_lookup);
+EXPORT_SYMBOL(kern_path_parent);
EXPORT_SYMBOL(kern_path);
EXPORT_SYMBOL(vfs_path_lookup);
EXPORT_SYMBOL(inode_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b0b95371696..e96e03782def 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1002,6 +1002,18 @@ const struct seq_operations mounts_op = {
.show = show_vfsmnt
};
+static int uuid_is_nil(u8 *uuid)
+{
+ int i;
+ u8 *cp = (u8 *)uuid;
+
+ for (i = 0; i < 16; i++) {
+ if (*cp++)
+ return 0;
+ }
+ return 1;
+}
+
static int show_mountinfo(struct seq_file *m, void *v)
{
struct proc_mounts *p = m->private;
@@ -1040,6 +1052,10 @@ static int show_mountinfo(struct seq_file *m, void *v)
if (IS_MNT_UNBINDABLE(mnt))
seq_puts(m, " unbindable");
+ if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
+ /* print the uuid */
+ seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
+
/* Filesystem specific data */
seq_puts(m, " - ");
show_type(m, sb);
@@ -1244,7 +1260,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
*/
br_write_lock(vfsmount_lock);
if (mnt_get_count(mnt) != 2) {
- br_write_lock(vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
return -EBUSY;
}
br_write_unlock(vfsmount_lock);
@@ -1767,6 +1783,10 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;
+ err = security_sb_remount(sb, data);
+ if (err)
+ return err;
+
down_write(&sb->s_umount);
if (flags & MS_BIND)
err = change_mount_flags(path->mnt, flags);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb4..01768e5e2c9b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
#include <linux/inet.h>
#include <linux/nfs_xdr.h>
#include <linux/slab.h>
+#include <linux/compat.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
*/
u64 nfs_compat_user_ino64(u64 fileid)
{
- int ino;
+#ifdef CONFIG_COMPAT
+ compat_ulong_t ino;
+#else
+ unsigned long ino;
+#endif
if (enable_ino64)
return fileid;
@@ -1513,7 +1518,7 @@ static int nfsiod_start(void)
{
struct workqueue_struct *wq;
dprintk("RPC: creating workqueue nfsiod\n");
- wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0);
+ wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
if (wq == NULL)
return -ENOMEM;
nfsiod_workqueue = wq;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a7474073148..1be36cf65bfc 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -298,6 +298,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
#if defined(CONFIG_NFS_V4_1)
struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
+extern void nfs4_schedule_session_recovery(struct nfs4_session *);
+#else
+static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
+{
+}
#endif /* CONFIG_NFS_V4_1 */
extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +312,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
-extern void nfs4_schedule_state_recovery(struct nfs_client *);
+extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern void nfs4_schedule_state_manager(struct nfs_client *);
-extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
-extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
+extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs41_handle_recall_slot(struct nfs_client *clp);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8cc..b73c34375f60 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -219,6 +219,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
goto out_err;
}
buf = kmalloc(rlen + 1, GFP_KERNEL);
+ if (!buf) {
+ dprintk("%s: Not enough memory\n", __func__);
+ goto out_err;
+ }
buf[rlen] = '\0';
memcpy(buf, r_addr, rlen);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78936a8f40ab..0a07e353a961 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -256,12 +256,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_state_mark_reclaim_nograce(clp, state);
- goto do_state_recovery;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- goto do_state_recovery;
+ nfs4_schedule_lease_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -272,7 +273,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR: %d Reset session\n", __func__,
errorcode);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_session_recovery(clp->cl_session);
exception->retry = 1;
break;
#endif /* defined(CONFIG_NFS_V4_1) */
@@ -295,8 +296,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
}
/* We failed to handle the error */
return nfs4_map_errors(ret);
-do_state_recovery:
- nfs4_schedule_state_recovery(clp);
+wait_on_recovery:
ret = nfs4_wait_clnt_recover(clp);
if (ret == 0)
exception->retry = 1;
@@ -435,8 +435,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
clp = res->sr_session->clp;
do_renew_lease(clp, timestamp);
/* Check sequence flags */
- if (atomic_read(&clp->cl_count) > 1)
- nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
+ if (res->sr_status_flags != 0)
+ nfs4_schedule_lease_recovery(clp);
break;
case -NFS4ERR_DELAY:
/* The server detected a resend of the RPC call and
@@ -1255,14 +1255,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_state_recovery(
- server->nfs_client);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session);
goto out;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
/* Don't recall a delegation if it was lost */
- nfs4_schedule_state_recovery(server->nfs_client);
+ nfs4_schedule_lease_recovery(server->nfs_client);
goto out;
case -ERESTARTSYS:
/*
@@ -1271,7 +1270,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
*/
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+ nfs4_schedule_stateid_recovery(server, state);
case -EKEYEXPIRED:
/*
* User RPCSEC_GSS context has expired.
@@ -1587,7 +1586,7 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
!test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
break;
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
ret = -EIO;
}
return ret;
@@ -3178,7 +3177,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
if (task->tk_status < 0) {
/* Unless we're shutting down, schedule state recovery! */
if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
return;
}
do_renew_lease(clp, timestamp);
@@ -3252,6 +3251,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
}
}
+static int buf_to_pages_noslab(const void *buf, size_t buflen,
+ struct page **pages, unsigned int *pgbase)
+{
+ struct page *newpage, **spages;
+ int rc = 0;
+ size_t len;
+ spages = pages;
+
+ do {
+ len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
+ newpage = alloc_page(GFP_KERNEL);
+
+ if (newpage == NULL)
+ goto unwind;
+ memcpy(page_address(newpage), buf, len);
+ buf += len;
+ buflen -= len;
+ *pages++ = newpage;
+ rc++;
+ } while (buflen != 0);
+
+ return rc;
+
+unwind:
+ for(; rc > 0; rc--)
+ __free_page(spages[rc-1]);
+ return -ENOMEM;
+}
+
struct nfs4_cached_acl {
int cached;
size_t len;
@@ -3420,13 +3448,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
.rpc_argp = &arg,
.rpc_resp = &res,
};
- int ret;
+ int ret, i;
if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP;
+ i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
+ if (i < 0)
+ return i;
nfs_inode_return_delegation(inode);
- buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
+
+ /*
+ * Free each page after tx, so the only ref left is
+ * held by the network stack
+ */
+ for (; i > 0; i--)
+ put_page(pages[i-1]);
+
/*
* Acl update can result in inode attribute update.
* so mark the attribute cache invalid.
@@ -3464,12 +3502,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
- nfs4_state_mark_reclaim_nograce(clp, state);
- goto do_state_recovery;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- goto do_state_recovery;
+ nfs4_schedule_lease_recovery(clp);
+ goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -3480,7 +3519,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_session_recovery(clp->cl_session);
task->tk_status = 0;
return -EAGAIN;
#endif /* CONFIG_NFS_V4_1 */
@@ -3497,9 +3536,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
}
task->tk_status = nfs4_map_errors(task->tk_status);
return 0;
-do_state_recovery:
+wait_on_recovery:
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
- nfs4_schedule_state_recovery(clp);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
task->tk_status = 0;
@@ -4110,7 +4148,7 @@ static void nfs4_lock_release(void *calldata)
task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
data->arg.lock_seqid);
if (!IS_ERR(task))
- rpc_put_task(task);
+ rpc_put_task_async(task);
dprintk("%s: cancelling lock!\n", __func__);
} else
nfs_free_seqid(data->arg.lock_seqid);
@@ -4134,23 +4172,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
{
- struct nfs_client *clp = server->nfs_client;
- struct nfs4_state *state = lsp->ls_state;
-
switch (error) {
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_EXPIRED:
+ lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
if (new_lock_owner != 0 ||
(lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- nfs4_state_mark_reclaim_nograce(clp, state);
- lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ nfs4_schedule_stateid_recovery(server, lsp->ls_state);
break;
case -NFS4ERR_STALE_STATEID:
- if (new_lock_owner != 0 ||
- (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
- nfs4_state_mark_reclaim_reboot(clp, state);
lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ case -NFS4ERR_EXPIRED:
+ nfs4_schedule_lease_recovery(server->nfs_client);
};
}
@@ -4366,12 +4399,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_EXPIRED:
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
+ nfs4_schedule_lease_recovery(server->nfs_client);
+ goto out;
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_DEADSESSION:
- nfs4_schedule_state_recovery(server->nfs_client);
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session);
goto out;
case -ERESTARTSYS:
/*
@@ -4381,7 +4416,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OPENMODE:
- nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+ nfs4_schedule_stateid_recovery(server, state);
err = 0;
goto out;
case -EKEYEXPIRED:
@@ -4988,10 +5023,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
int status;
unsigned *ptr;
struct nfs4_session *session = clp->cl_session;
+ long timeout = 0;
+ int err;
dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
- status = _nfs4_proc_create_session(clp);
+ do {
+ status = _nfs4_proc_create_session(clp);
+ if (status == -NFS4ERR_DELAY) {
+ err = nfs4_delay(clp->cl_rpcclient, &timeout);
+ if (err)
+ status = err;
+ }
+ } while (status == -NFS4ERR_DELAY);
+
if (status)
goto out;
@@ -5100,7 +5145,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return -EAGAIN;
default:
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
}
return 0;
}
@@ -5187,7 +5232,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
if (IS_ERR(task))
ret = PTR_ERR(task);
else
- rpc_put_task(task);
+ rpc_put_task_async(task);
dprintk("<-- %s status=%d\n", __func__, ret);
return ret;
}
@@ -5203,8 +5248,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
goto out;
}
ret = rpc_wait_for_completion_task(task);
- if (!ret)
+ if (!ret) {
+ struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
+
+ if (task->tk_status == 0)
+ nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
ret = task->tk_status;
+ }
rpc_put_task(task);
out:
dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5241,7 +5291,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
rpc_delay(task, NFS4_POLL_RETRY_MAX);
return -EAGAIN;
default:
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_lease_recovery(clp);
}
return 0;
}
@@ -5309,6 +5359,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
status = PTR_ERR(task);
goto out;
}
+ status = nfs4_wait_for_completion_rpc_task(task);
+ if (status == 0)
+ status = task->tk_status;
rpc_put_task(task);
return 0;
out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04c..0592288f9f06 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1007,9 +1007,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
}
/*
- * Schedule a state recovery attempt
+ * Schedule a lease recovery attempt
*/
-void nfs4_schedule_state_recovery(struct nfs_client *clp)
+void nfs4_schedule_lease_recovery(struct nfs_client *clp)
{
if (!clp)
return;
@@ -1018,7 +1018,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
nfs4_schedule_state_manager(clp);
}
-int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
+static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1032,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
return 1;
}
-int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
+static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1041,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
return 1;
}
+void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
+{
+ struct nfs_client *clp = server->nfs_client;
+
+ nfs4_state_mark_reclaim_nograce(clp, state);
+ nfs4_schedule_state_manager(clp);
+}
+
static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
{
struct inode *inode = state->inode;
@@ -1436,10 +1444,15 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
}
#ifdef CONFIG_NFS_V4_1
+void nfs4_schedule_session_recovery(struct nfs4_session *session)
+{
+ nfs4_schedule_lease_recovery(session->clp);
+}
+
void nfs41_handle_recall_slot(struct nfs_client *clp)
{
set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1460,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
clp->cl_boot_time = CURRENT_TIME;
nfs4_state_start_reclaim_nograce(clp);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -1455,7 +1468,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
{
if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
nfs4_state_start_reclaim_reboot(clp);
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -1475,7 +1488,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
{
nfs_expire_all_delegations(clp);
if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
- nfs4_schedule_state_recovery(clp);
+ nfs4_schedule_state_manager(clp);
}
void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee9..94d50e86a124 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1660,7 +1660,7 @@ static void encode_create_session(struct xdr_stream *xdr,
p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
*p++ = cpu_to_be32(OP_CREATE_SESSION);
- p = xdr_encode_hyper(p, clp->cl_ex_clid);
+ p = xdr_encode_hyper(p, clp->cl_clientid);
*p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
*p++ = cpu_to_be32(args->flags); /*flags */
@@ -4694,7 +4694,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
- xdr_decode_hyper(p, &clp->cl_ex_clid);
+ xdr_decode_hyper(p, &clp->cl_clientid);
p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
goto out_overflow;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a20023..c541093a5bf2 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
/* Default path we try to mount. "%s" gets replaced by our IP address */
#define NFS_ROOT "/tftpboot/%s"
+/* Default NFSROOT mount options. */
+#define NFS_DEF_OPTIONS "udp"
+
/* Parameters passed from the kernel command line */
static char nfs_root_parms[256] __initdata = "";
/* Text-based mount options passed to super.c */
-static char nfs_root_options[256] __initdata = "";
+static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
/* Address of NFS server */
static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
}
static int __init root_nfs_cat(char *dest, const char *src,
- const size_t destlen)
+ const size_t destlen)
{
+ size_t len = strlen(dest);
+
+ if (len && dest[len - 1] != ',')
+ if (strlcat(dest, ",", destlen) > destlen)
+ return -1;
+
if (strlcat(dest, src, destlen) > destlen)
return -1;
return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
if (root_nfs_cat(nfs_root_options, incoming,
sizeof(nfs_root_options)))
return -1;
-
- /*
- * Possibly prepare for more options to be appended
- */
- if (nfs_root_options[0] != '\0' &&
- nfs_root_options[strlen(nfs_root_options)] != ',')
- if (root_nfs_cat(nfs_root_options, ",",
- sizeof(nfs_root_options)))
- return -1;
-
return 0;
}
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
*/
static int __init root_nfs_data(char *cmdline)
{
- char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
+ char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
int len, retval = -1;
char *tmp = NULL;
const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
* Append mandatory options for nfsroot so they override
* what has come before
*/
- snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4",
+ snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
&servaddr);
- if (root_nfs_cat(nfs_root_options, addr_option,
+ if (root_nfs_cat(nfs_root_options, mand_options,
sizeof(nfs_root_options)))
goto out_optionstoolong;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd1..6481d537d69d 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -180,7 +180,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
task_setup_data.rpc_client = NFS_CLIENT(dir);
task = rpc_run_task(&task_setup_data);
if (!IS_ERR(task))
- rpc_put_task(task);
+ rpc_put_task_async(task);
return 1;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046cb..42b92d7a9cc4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1292,6 +1292,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
+ if (how & FLUSH_SYNC)
+ rpc_wait_for_completion_task(task);
rpc_put_task(task);
return 0;
}
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index bf9cbd242ddd..124e8fcb0dd6 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -22,30 +22,17 @@
static struct file *do_open(char *name, int flags)
{
- struct nameidata nd;
struct vfsmount *mnt;
- int error;
+ struct file *file;
mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
if (IS_ERR(mnt))
return (struct file *)mnt;
- error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd);
- mntput(mnt); /* drop do_kern_mount reference */
- if (error)
- return ERR_PTR(error);
-
- if (flags == O_RDWR)
- error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
- else
- error = may_open(&nd.path, MAY_WRITE, flags);
+ file = file_open_root(mnt->mnt_root, mnt, name, flags);
- if (!error)
- return dentry_open(nd.path.dentry, nd.path.mnt, flags,
- current_cred());
-
- path_put(&nd.path);
- return ERR_PTR(error);
+ mntput(mnt); /* drop do_kern_mount reference */
+ return file;
}
static struct {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cde36cb0f348..02eb4edf0ece 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,7 +432,7 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
*/
- p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4);
+ p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 54b60bfceb8d..7b566ec14e18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2445,15 +2445,16 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
static struct nfs4_delegation *
find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
{
- struct nfs4_delegation *dp = NULL;
+ struct nfs4_delegation *dp;
spin_lock(&recall_lock);
- list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
- if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
- break;
- }
+ list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
+ if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid) {
+ spin_unlock(&recall_lock);
+ return dp;
+ }
spin_unlock(&recall_lock);
- return dp;
+ return NULL;
}
int share_access_to_flags(u32 share_access)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 956629b9cdc9..615f0a9f0600 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -317,8 +317,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
READ_BUF(dummy32);
len += (XDR_QUADLEN(dummy32) << 2);
READMEM(buf, dummy32);
- if ((host_err = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
- goto out_nfserr;
+ if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
+ return status;
iattr->ia_valid |= ATTR_UID;
}
if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
@@ -328,8 +328,8 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
READ_BUF(dummy32);
len += (XDR_QUADLEN(dummy32) << 2);
READMEM(buf, dummy32);
- if ((host_err = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
- goto out_nfserr;
+ if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
+ return status;
iattr->ia_valid |= ATTR_GID;
}
if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
@@ -1142,7 +1142,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
u32 dummy;
char *machine_name;
- int i;
+ int i, j;
int nr_secflavs;
READ_BUF(16);
@@ -1215,7 +1215,7 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
READ_BUF(4);
READ32(dummy);
READ_BUF(dummy * 4);
- for (i = 0; i < dummy; ++i)
+ for (j = 0; j < dummy; ++j)
READ32(dummy);
break;
case RPC_AUTH_GSS:
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 388e9e8f5286..85f7baa15f5d 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -35,11 +35,6 @@
#include "btnode.h"
-void nilfs_btnode_cache_init_once(struct address_space *btnc)
-{
- nilfs_mapping_init_once(btnc);
-}
-
static const struct address_space_operations def_btnode_aops = {
.sync_page = block_sync_page,
};
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 79037494f1e0..1b8ebd888c28 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
struct buffer_head *newbh;
};
-void nilfs_btnode_cache_init_once(struct address_space *);
void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
void nilfs_btnode_cache_clear(struct address_space *);
struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 6a0e2a189f60..a0babd2bff6a 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -454,9 +454,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
struct backing_dev_info *bdi = inode->i_sb->s_bdi;
INIT_LIST_HEAD(&shadow->frozen_buffers);
- nilfs_mapping_init_once(&shadow->frozen_data);
+ address_space_init_once(&shadow->frozen_data);
nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops);
- nilfs_mapping_init_once(&shadow->frozen_btnodes);
+ address_space_init_once(&shadow->frozen_btnodes);
nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops);
mi->mi_shadow = shadow;
return 0;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 98034271cd02..161791d26458 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -397,7 +397,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
- inc_nlink(old_inode);
nilfs_set_link(new_dir, new_de, new_page, old_inode);
nilfs_mark_inode_dirty(new_dir);
new_inode->i_ctime = CURRENT_TIME;
@@ -411,13 +410,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= NILFS_LINK_MAX)
goto out_dir;
}
- inc_nlink(old_inode);
err = nilfs_add_link(new_dentry, old_inode);
- if (err) {
- drop_nlink(old_inode);
- nilfs_mark_inode_dirty(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de) {
inc_nlink(new_dir);
nilfs_mark_inode_dirty(new_dir);
@@ -431,7 +426,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_inode->i_ctime = CURRENT_TIME;
nilfs_delete_entry(old_de, old_page);
- drop_nlink(old_inode);
if (dir_de) {
nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0c432416cfef..a585b35fd6bc 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -492,19 +492,6 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
return nc;
}
-void nilfs_mapping_init_once(struct address_space *mapping)
-{
- memset(mapping, 0, sizeof(*mapping));
- INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
- spin_lock_init(&mapping->tree_lock);
- INIT_LIST_HEAD(&mapping->private_list);
- spin_lock_init(&mapping->private_lock);
-
- spin_lock_init(&mapping->i_mmap_lock);
- INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
- INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
-}
-
void nilfs_mapping_init(struct address_space *mapping,
struct backing_dev_info *bdi,
const struct address_space_operations *aops)
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index 622df27cd891..2a00953ebd5f 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -61,7 +61,6 @@ void nilfs_free_private_page(struct page *);
int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
void nilfs_copy_back_pages(struct address_space *, struct address_space *);
void nilfs_clear_dirty_pages(struct address_space *);
-void nilfs_mapping_init_once(struct address_space *mapping);
void nilfs_mapping_init(struct address_space *mapping,
struct backing_dev_info *bdi,
const struct address_space_operations *aops);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55ebae5c7f39..2de9f636792a 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -430,7 +430,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
nilfs_segctor_map_segsum_entry(
sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
- if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
+ if (NILFS_I(inode)->i_root &&
+ !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
/* skip finfo */
}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 58fd707174e1..1673b3d99842 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1279,7 +1279,7 @@ static void nilfs_inode_init_once(void *obj)
#ifdef CONFIG_NILFS_XATTR
init_rwsem(&ii->xattr_sem);
#endif
- nilfs_btnode_cache_init_once(&ii->i_btnode_cache);
+ address_space_init_once(&ii->i_btnode_cache);
ii->i_bmap = &ii->i_bmap_data;
inode_init_once(&ii->vfs_inode);
}
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 6d80ecc7834f..7eb90403fc8a 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -56,7 +56,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
int ret = 0; /* if all else fails, just return false */
struct ocfs2_super *osb;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
inode = dentry->d_inode;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5dbc3062b4fd..254652a9b542 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
dentry->d_name.len, dentry->d_name.name,
fh, len, connectable);
- if (len < 3 || (connectable && len < 6)) {
- mlog(ML_ERROR, "fh buffer is too small for encoding\n");
+ if (connectable && (len < 6)) {
+ *max_len = 6;
+ type = 255;
+ goto bail;
+ } else if (len < 3) {
+ *max_len = 3;
type = 255;
goto bail;
}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 43e56b97f9c0..6180da1e37e6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -405,9 +405,9 @@ static inline int ocfs2_remove_extent_credits(struct super_block *sb)
ocfs2_quota_trans_credits(sb);
}
-/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
- * bitmap block for the new bit) dx_root update for free list */
-#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2 + 1)
+/* data block for new dir/symlink, allocation of directory block, dx_root
+ * update for free list */
+#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + OCFS2_SUBALLOC_ALLOC + 1)
static inline int ocfs2_add_dir_index_credits(struct super_block *sb)
{
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 849fb4a2e814..d6c25d76b537 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -293,7 +293,7 @@ static int ocfs2_mknod(struct inode *dir,
}
/* get security xattr */
- status = ocfs2_init_security_get(inode, dir, &si);
+ status = ocfs2_init_security_get(inode, dir, &dentry->d_name, &si);
if (status) {
if (status == -EOPNOTSUPP)
si.enable = 0;
@@ -1665,7 +1665,7 @@ static int ocfs2_symlink(struct inode *dir,
}
/* get security xattr */
- status = ocfs2_init_security_get(inode, dir, &si);
+ status = ocfs2_init_security_get(inode, dir, &dentry->d_name, &si);
if (status) {
if (status == -EOPNOTSUPP)
si.enable = 0;
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 196fcb52d95d..d5ab56cbe5c5 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -114,7 +114,4 @@ int ocfs2_local_write_dquot(struct dquot *dquot);
extern const struct dquot_operations ocfs2_quota_operations;
extern struct quota_format_type ocfs2_quota_format;
-int ocfs2_quota_setup(void);
-void ocfs2_quota_shutdown(void);
-
#endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 4607923eb24c..a73f64166481 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -63,8 +63,6 @@
* write to gf
*/
-static struct workqueue_struct *ocfs2_quota_wq = NULL;
-
static void qsync_work_fn(struct work_struct *work);
static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
@@ -400,8 +398,8 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
OCFS2_QBLK_RESERVED_SPACE;
oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
- queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
- msecs_to_jiffies(oinfo->dqi_syncms));
+ schedule_delayed_work(&oinfo->dqi_sync_work,
+ msecs_to_jiffies(oinfo->dqi_syncms));
out_err:
mlog_exit(status);
@@ -635,8 +633,8 @@ static void qsync_work_fn(struct work_struct *work)
struct super_block *sb = oinfo->dqi_gqinode->i_sb;
dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
- queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
- msecs_to_jiffies(oinfo->dqi_syncms));
+ schedule_delayed_work(&oinfo->dqi_sync_work,
+ msecs_to_jiffies(oinfo->dqi_syncms));
}
/*
@@ -923,20 +921,3 @@ const struct dquot_operations ocfs2_quota_operations = {
.alloc_dquot = ocfs2_alloc_dquot,
.destroy_dquot = ocfs2_destroy_dquot,
};
-
-int ocfs2_quota_setup(void)
-{
- ocfs2_quota_wq = create_workqueue("o2quot");
- if (!ocfs2_quota_wq)
- return -ENOMEM;
- return 0;
-}
-
-void ocfs2_quota_shutdown(void)
-{
- if (ocfs2_quota_wq) {
- flush_workqueue(ocfs2_quota_wq);
- destroy_workqueue(ocfs2_quota_wq);
- ocfs2_quota_wq = NULL;
- }
-}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index b5f9160e93e9..c384d634872a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3228,7 +3228,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
u32 num_clusters, unsigned int e_flags)
{
int ret, delete, index, credits = 0;
- u32 new_bit, new_len;
+ u32 new_bit, new_len, orig_num_clusters;
unsigned int set_len;
struct ocfs2_super *osb = OCFS2_SB(sb);
handle_t *handle;
@@ -3261,6 +3261,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
goto out;
}
+ orig_num_clusters = num_clusters;
+
while (num_clusters) {
ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
p_cluster, num_clusters,
@@ -3348,7 +3350,8 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
* in write-back mode.
*/
if (context->get_clusters == ocfs2_di_get_clusters) {
- ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
+ ret = ocfs2_cow_sync_writeback(sb, context, cpos,
+ orig_num_clusters);
if (ret)
mlog_errno(ret);
}
@@ -4325,7 +4328,8 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
/* If the security isn't preserved, we need to re-initialize them. */
if (!preserve) {
- error = ocfs2_init_security_and_acl(dir, new_orphan_inode);
+ error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
+ &new_dentry->d_name);
if (error)
mlog_errno(error);
}
@@ -4376,7 +4380,7 @@ static int ocfs2_user_path_parent(const char __user *path,
if (IS_ERR(s))
return PTR_ERR(s);
- error = path_lookup(s, LOOKUP_PARENT, nd);
+ error = kern_path_parent(s, nd);
if (error)
putname(s);
else
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 38f986d2447e..236ed1bdca2c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1316,7 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb,
struct mount_options *mopt,
int is_remount)
{
- int status;
+ int status, user_stack = 0;
char *p;
u32 tmp;
@@ -1459,6 +1459,15 @@ static int ocfs2_parse_options(struct super_block *sb,
memcpy(mopt->cluster_stack, args[0].from,
OCFS2_STACK_LABEL_LEN);
mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
+ /*
+ * Open code the memcmp here as we don't have
+ * an osb to pass to
+ * ocfs2_userspace_stack().
+ */
+ if (memcmp(mopt->cluster_stack,
+ OCFS2_CLASSIC_CLUSTER_STACK,
+ OCFS2_STACK_LABEL_LEN))
+ user_stack = 1;
break;
case Opt_inode64:
mopt->mount_opt |= OCFS2_MOUNT_INODE64;
@@ -1514,13 +1523,16 @@ static int ocfs2_parse_options(struct super_block *sb,
}
}
- /* Ensure only one heartbeat mode */
- tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
- OCFS2_MOUNT_HB_NONE);
- if (hweight32(tmp) != 1) {
- mlog(ML_ERROR, "Invalid heartbeat mount options\n");
- status = 0;
- goto bail;
+ if (user_stack == 0) {
+ /* Ensure only one heartbeat mode */
+ tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
+ OCFS2_MOUNT_HB_GLOBAL |
+ OCFS2_MOUNT_HB_NONE);
+ if (hweight32(tmp) != 1) {
+ mlog(ML_ERROR, "Invalid heartbeat mount options\n");
+ status = 0;
+ goto bail;
+ }
}
status = 1;
@@ -1645,16 +1657,11 @@ static int __init ocfs2_init(void)
mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
}
- status = ocfs2_quota_setup();
- if (status)
- goto leave;
-
ocfs2_set_locking_protocol();
status = register_quota_format(&ocfs2_quota_format);
leave:
if (status < 0) {
- ocfs2_quota_shutdown();
ocfs2_free_mem_caches();
exit_ocfs2_uptodate_cache();
}
@@ -1671,8 +1678,6 @@ static void __exit ocfs2_exit(void)
{
mlog_entry_void();
- ocfs2_quota_shutdown();
-
if (ocfs2_wq) {
flush_workqueue(ocfs2_wq);
destroy_workqueue(ocfs2_wq);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 67cd43914641..6bb602486c6b 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7185,7 +7185,8 @@ out:
* must not hold any lock expect i_mutex.
*/
int ocfs2_init_security_and_acl(struct inode *dir,
- struct inode *inode)
+ struct inode *inode,
+ const struct qstr *qstr)
{
int ret = 0;
struct buffer_head *dir_bh = NULL;
@@ -7193,7 +7194,7 @@ int ocfs2_init_security_and_acl(struct inode *dir,
.enable = 1,
};
- ret = ocfs2_init_security_get(inode, dir, &si);
+ ret = ocfs2_init_security_get(inode, dir, qstr, &si);
if (!ret) {
ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
si.name, si.value, si.value_len,
@@ -7261,13 +7262,14 @@ static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
int ocfs2_init_security_get(struct inode *inode,
struct inode *dir,
+ const struct qstr *qstr,
struct ocfs2_security_xattr_info *si)
{
/* check whether ocfs2 support feature xattr */
if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
return -EOPNOTSUPP;
- return security_inode_init_security(inode, dir, &si->name, &si->value,
- &si->value_len);
+ return security_inode_init_security(inode, dir, qstr, &si->name,
+ &si->value, &si->value_len);
}
int ocfs2_init_security_set(handle_t *handle,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index aa64bb37a65b..d63cfb72316b 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -57,6 +57,7 @@ int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
struct ocfs2_dinode *di);
int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
int ocfs2_init_security_get(struct inode *, struct inode *,
+ const struct qstr *,
struct ocfs2_security_xattr_info *);
int ocfs2_init_security_set(handle_t *, struct inode *,
struct buffer_head *,
@@ -94,5 +95,6 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
struct buffer_head *new_bh,
bool preserve_security);
int ocfs2_init_security_and_acl(struct inode *dir,
- struct inode *inode);
+ struct inode *inode,
+ const struct qstr *qstr);
#endif /* OCFS2_XATTR_H */
diff --git a/fs/open.c b/fs/open.c
index 5a2c6ebc22b5..f83ca80cc59a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -233,6 +233,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
+
+ /* It's not possible punch hole on append only file */
+ if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
+ return -EPERM;
+
+ if (IS_IMMUTABLE(inode))
+ return -EPERM;
+
/*
* Revalidate the write permissions, in case security policy has
* changed since the files were opened.
@@ -565,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
{
struct path path;
int error = -EINVAL;
- int follow;
+ int lookup_flags;
- if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
+ if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
goto out;
- follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
- error = user_path_at(dfd, filename, follow, &path);
+ lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+ if (flag & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+ error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
goto out;
error = mnt_want_write(path.mnt);
@@ -661,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
int (*open)(struct inode *, struct file *),
const struct cred *cred)
{
+ static const struct file_operations empty_fops = {};
struct inode *inode;
int error;
f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
FMODE_PREAD | FMODE_PWRITE;
+
+ if (unlikely(f->f_flags & O_PATH))
+ f->f_mode = FMODE_PATH;
+
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = __get_file_write_access(inode, mnt);
@@ -679,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
f->f_path.dentry = dentry;
f->f_path.mnt = mnt;
f->f_pos = 0;
- f->f_op = fops_get(inode->i_fop);
file_sb_list_add(f, inode->i_sb);
+ if (unlikely(f->f_mode & FMODE_PATH)) {
+ f->f_op = &empty_fops;
+ return f;
+ }
+
+ f->f_op = fops_get(inode->i_fop);
+
error = security_dentry_open(f, cred);
if (error)
goto cleanup_all;
@@ -693,7 +714,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
if (error)
goto cleanup_all;
}
- ima_counts_get(f);
+ if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
+ i_readcount_inc(inode);
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
@@ -882,15 +904,110 @@ void fd_install(unsigned int fd, struct file *file)
EXPORT_SYMBOL(fd_install);
+static inline int build_open_flags(int flags, int mode, struct open_flags *op)
+{
+ int lookup_flags = 0;
+ int acc_mode;
+
+ if (!(flags & O_CREAT))
+ mode = 0;
+ op->mode = mode;
+
+ /* Must never be set by userspace */
+ flags &= ~FMODE_NONOTIFY;
+
+ /*
+ * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
+ * check for O_DSYNC if the need any syncing at all we enforce it's
+ * always set instead of having to deal with possibly weird behaviour
+ * for malicious applications setting only __O_SYNC.
+ */
+ if (flags & __O_SYNC)
+ flags |= O_DSYNC;
+
+ /*
+ * If we have O_PATH in the open flag. Then we
+ * cannot have anything other than the below set of flags
+ */
+ if (flags & O_PATH) {
+ flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
+ acc_mode = 0;
+ } else {
+ acc_mode = MAY_OPEN | ACC_MODE(flags);
+ }
+
+ op->open_flag = flags;
+
+ /* O_TRUNC implies we need access checks for write permissions */
+ if (flags & O_TRUNC)
+ acc_mode |= MAY_WRITE;
+
+ /* Allow the LSM permission hook to distinguish append
+ access from general write access. */
+ if (flags & O_APPEND)
+ acc_mode |= MAY_APPEND;
+
+ op->acc_mode = acc_mode;
+
+ op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
+
+ if (flags & O_CREAT) {
+ op->intent |= LOOKUP_CREATE;
+ if (flags & O_EXCL)
+ op->intent |= LOOKUP_EXCL;
+ }
+
+ if (flags & O_DIRECTORY)
+ lookup_flags |= LOOKUP_DIRECTORY;
+ if (!(flags & O_NOFOLLOW))
+ lookup_flags |= LOOKUP_FOLLOW;
+ return lookup_flags;
+}
+
+/**
+ * filp_open - open file and return file pointer
+ *
+ * @filename: path to open
+ * @flags: open flags as per the open(2) second argument
+ * @mode: mode for the new file if O_CREAT is set, else ignored
+ *
+ * This is the helper to open a file from kernelspace if you really
+ * have to. But in generally you should not do this, so please move
+ * along, nothing to see here..
+ */
+struct file *filp_open(const char *filename, int flags, int mode)
+{
+ struct open_flags op;
+ int lookup = build_open_flags(flags, mode, &op);
+ return do_filp_open(AT_FDCWD, filename, &op, lookup);
+}
+EXPORT_SYMBOL(filp_open);
+
+struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
+ const char *filename, int flags)
+{
+ struct open_flags op;
+ int lookup = build_open_flags(flags, 0, &op);
+ if (flags & O_CREAT)
+ return ERR_PTR(-EINVAL);
+ if (!filename && (flags & O_DIRECTORY))
+ if (!dentry->d_inode->i_op->lookup)
+ return ERR_PTR(-ENOTDIR);
+ return do_file_open_root(dentry, mnt, filename, &op, lookup);
+}
+EXPORT_SYMBOL(file_open_root);
+
long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
{
+ struct open_flags op;
+ int lookup = build_open_flags(flags, mode, &op);
char *tmp = getname(filename);
int fd = PTR_ERR(tmp);
if (!IS_ERR(tmp)) {
fd = get_unused_fd_flags(flags);
if (fd >= 0) {
- struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);
+ struct file *f = do_filp_open(dfd, tmp, &op, lookup);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
@@ -960,8 +1077,10 @@ int filp_close(struct file *filp, fl_owner_t id)
if (filp->f_op && filp->f_op->flush)
retval = filp->f_op->flush(filp, id);
- dnotify_flush(filp, id);
- locks_remove_posix(filp, id);
+ if (likely(!(filp->f_mode & FMODE_PATH))) {
+ dnotify_flush(filp, id);
+ locks_remove_posix(filp, id);
+ }
fput(filp);
return retval;
}
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 789c625c7aa5..b10e3540d5b7 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -251,6 +251,11 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
}
vm->vblk_size = get_unaligned_be32(data + 0x08);
+ if (vm->vblk_size == 0) {
+ ldm_error ("Illegal VBLK size");
+ return false;
+ }
+
vm->vblk_offset = get_unaligned_be32(data + 0x0C);
vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
index 68d6a216ee79..11f688bd76c5 100644
--- a/fs/partitions/mac.c
+++ b/fs/partitions/mac.c
@@ -29,10 +29,9 @@ static inline void mac_fix_string(char *stg, int len)
int mac_partition(struct parsed_partitions *state)
{
- int slot = 1;
Sector sect;
unsigned char *data;
- int blk, blocks_in_map;
+ int slot, blocks_in_map;
unsigned secsize;
#ifdef CONFIG_PPC_PMAC
int found_root = 0;
@@ -59,10 +58,14 @@ int mac_partition(struct parsed_partitions *state)
put_dev_sector(sect);
return 0; /* not a MacOS disk */
}
- strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
blocks_in_map = be32_to_cpu(part->map_count);
- for (blk = 1; blk <= blocks_in_map; ++blk) {
- int pos = blk * secsize;
+ if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) {
+ put_dev_sector(sect);
+ return 0;
+ }
+ strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
+ for (slot = 1; slot <= blocks_in_map; ++slot) {
+ int pos = slot * secsize;
put_dev_sector(sect);
data = read_part_sector(state, pos/512, &sect);
if (!data)
@@ -113,13 +116,11 @@ int mac_partition(struct parsed_partitions *state)
}
if (goodness > found_root_goodness) {
- found_root = blk;
+ found_root = slot;
found_root_goodness = goodness;
}
}
#endif /* CONFIG_PPC_PMAC */
-
- ++slot;
}
#ifdef CONFIG_PPC_PMAC
if (found_root_goodness)
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index 48cec7cbca17..764b86a01965 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,10 +10,13 @@
#include "check.h"
#include "osf.h"
+#define MAX_OSF_PARTITIONS 18
+
int osf_partition(struct parsed_partitions *state)
{
int i;
int slot = 1;
+ unsigned int npartitions;
Sector sect;
unsigned char *data;
struct disklabel {
@@ -45,7 +48,7 @@ int osf_partition(struct parsed_partitions *state)
u8 p_fstype;
u8 p_frag;
__le16 p_cpg;
- } d_partitions[8];
+ } d_partitions[MAX_OSF_PARTITIONS];
} * label;
struct d_partition * partition;
@@ -63,7 +66,12 @@ int osf_partition(struct parsed_partitions *state)
put_dev_sector(sect);
return 0;
}
- for (i = 0 ; i < le16_to_cpu(label->d_npartitions); i++, partition++) {
+ npartitions = le16_to_cpu(label->d_npartitions);
+ if (npartitions > MAX_OSF_PARTITIONS) {
+ put_dev_sector(sect);
+ return 0;
+ }
+ for (i = 0 ; i < npartitions; i++, partition++) {
if (slot == state->limit)
break;
if (le32_to_cpu(partition->p_size))
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9d096e82b201..d49c4b5d2c3e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2620,35 +2620,6 @@ static const struct pid_entry proc_base_stuff[] = {
&proc_self_inode_operations, NULL, {}),
};
-/*
- * Exceptional case: normally we are not allowed to unhash a busy
- * directory. In this case, however, we can do it - no aliasing problems
- * due to the way we treat inodes.
- */
-static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
- struct inode *inode;
- struct task_struct *task;
-
- if (nd->flags & LOOKUP_RCU)
- return -ECHILD;
-
- inode = dentry->d_inode;
- task = get_proc_task(inode);
- if (task) {
- put_task_struct(task);
- return 1;
- }
- d_drop(dentry);
- return 0;
-}
-
-static const struct dentry_operations proc_base_dentry_operations =
-{
- .d_revalidate = proc_base_revalidate,
- .d_delete = pid_delete_dentry,
-};
-
static struct dentry *proc_base_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
@@ -2685,7 +2656,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
- d_set_d_op(dentry, &proc_base_dentry_operations);
d_add(dentry, inode);
error = NULL;
out:
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 176ce4cda68a..d6a7ca1fdac5 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -27,6 +27,7 @@
static void proc_evict_inode(struct inode *inode)
{
struct proc_dir_entry *de;
+ struct ctl_table_header *head;
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
@@ -38,8 +39,11 @@ static void proc_evict_inode(struct inode *inode)
de = PROC_I(inode)->pde;
if (de)
pde_put(de);
- if (PROC_I(inode)->sysctl)
- sysctl_head_put(PROC_I(inode)->sysctl);
+ head = PROC_I(inode)->sysctl;
+ if (head) {
+ rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
+ sysctl_head_put(head);
+ }
}
struct vfsmount *proc_mnt;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4fc7ff..927cbd115e53 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
return;
root = of_find_node_by_path("/");
if (root == NULL) {
- printk(KERN_ERR "/proc/device-tree: can't find root\n");
+ pr_debug("/proc/device-tree: can't find root\n");
return;
}
proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92a34ef..f50133c11c24 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -32,7 +32,6 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
ei->sysctl_entry = table;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
- inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
inode->i_mode = table->mode;
if (!table->child) {
inode->i_mode |= S_IFREG;
@@ -408,15 +407,18 @@ static int proc_sys_compare(const struct dentry *parent,
const struct dentry *dentry, const struct inode *inode,
unsigned int len, const char *str, const struct qstr *name)
{
+ struct ctl_table_header *head;
/* Although proc doesn't have negative dentries, rcu-walk means
* that inode here can be NULL */
+ /* AV: can it, indeed? */
if (!inode)
- return 0;
+ return 1;
if (name->len != len)
return 1;
if (memcmp(name->name, str, len))
return 1;
- return !sysctl_is_seen(PROC_I(inode)->sysctl);
+ head = rcu_dereference(PROC_I(inode)->sysctl);
+ return !head || !sysctl_is_seen(head);
}
static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036831e2..1bba24bad820 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
struct inode *inode = dentry->d_inode;
int maxlen = *lenp;
- if (maxlen < 3)
+ if (need_parent && (maxlen < 5)) {
+ *lenp = 5;
return 255;
+ } else if (maxlen < 3) {
+ *lenp = 3;
+ return 255;
+ }
data[0] = inode->i_ino;
data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3eea859e6990..c77514bd5776 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2876,7 +2876,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
reiserfs_mounted_fs_count++;
if (reiserfs_mounted_fs_count <= 1) {
reiserfs_write_unlock(sb);
- commit_wq = create_workqueue("reiserfs");
+ commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
reiserfs_write_lock(sb);
}
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ba5f51ec3458..118662690cdf 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -593,7 +593,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
new_inode_init(inode, dir, mode);
jbegin_count += reiserfs_cache_default_acl(dir);
- retval = reiserfs_security_init(dir, inode, &security);
+ retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
if (retval < 0) {
drop_new_inode(inode);
return retval;
@@ -667,7 +667,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
new_inode_init(inode, dir, mode);
jbegin_count += reiserfs_cache_default_acl(dir);
- retval = reiserfs_security_init(dir, inode, &security);
+ retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
if (retval < 0) {
drop_new_inode(inode);
return retval;
@@ -747,7 +747,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
new_inode_init(inode, dir, mode);
jbegin_count += reiserfs_cache_default_acl(dir);
- retval = reiserfs_security_init(dir, inode, &security);
+ retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
if (retval < 0) {
drop_new_inode(inode);
return retval;
@@ -771,7 +771,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
dentry, inode, &security);
if (retval) {
- dir->i_nlink--;
+ DEC_DIR_INODE_NLINK(dir)
goto out_failed;
}
@@ -1032,7 +1032,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
}
new_inode_init(inode, parent_dir, mode);
- retval = reiserfs_security_init(parent_dir, inode, &security);
+ retval = reiserfs_security_init(parent_dir, inode, &dentry->d_name,
+ &security);
if (retval < 0) {
drop_new_inode(inode);
return retval;
@@ -1122,10 +1123,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
reiserfs_write_unlock(dir->i_sb);
return -EMLINK;
}
- if (inode->i_nlink == 0) {
- reiserfs_write_unlock(dir->i_sb);
- return -ENOENT;
- }
/* inc before scheduling so reiserfs_unlink knows we are here */
inc_nlink(inode);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 3cfb2e933644..5c11ca82b782 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -978,8 +978,6 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- if (nd->flags & LOOKUP_RCU)
- return -ECHILD;
return -EPERM;
}
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 237c6928d3c6..ef66c18a9332 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -54,6 +54,7 @@ static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
* of blocks needed for the transaction. If successful, reiserfs_security
* must be released using reiserfs_security_free when the caller is done. */
int reiserfs_security_init(struct inode *dir, struct inode *inode,
+ const struct qstr *qstr,
struct reiserfs_security_handle *sec)
{
int blocks = 0;
@@ -65,7 +66,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
if (IS_PRIVATE(dir))
return 0;
- error = security_inode_init_security(inode, dir, &sec->name,
+ error = security_inode_init_security(inode, dir, qstr, &sec->name,
&sec->value, &sec->length);
if (error) {
if (error == -EOPNOTSUPP)
diff --git a/fs/stat.c b/fs/stat.c
index d5c61cf2b703..961039121cb8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
int error = -EINVAL;
int lookup_flags = 0;
- if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0)
+ if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
+ AT_EMPTY_PATH)) != 0)
goto out;
if (!(flag & AT_SYMLINK_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
if (flag & AT_NO_AUTOMOUNT)
lookup_flags |= LOOKUP_NO_AUTOMOUNT;
+ if (flag & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
error = user_path_at(dfd, filename, lookup_flags, &path);
if (error)
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
if (bufsiz <= 0)
return -EINVAL;
- error = user_path_at(dfd, pathname, 0, &path);
+ error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
if (!error) {
struct inode *inode = path.dentry->d_inode;
diff --git a/fs/statfs.c b/fs/statfs.c
index 30ea8c8a996b..8244924dec55 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf)
}
EXPORT_SYMBOL(vfs_statfs);
-static int do_statfs_native(struct path *path, struct statfs *buf)
+int user_statfs(const char __user *pathname, struct kstatfs *st)
{
- struct kstatfs st;
- int retval;
+ struct path path;
+ int error = user_path(pathname, &path);
+ if (!error) {
+ error = vfs_statfs(&path, st);
+ path_put(&path);
+ }
+ return error;
+}
- retval = vfs_statfs(path, &st);
- if (retval)
- return retval;
+int fd_statfs(int fd, struct kstatfs *st)
+{
+ struct file *file = fget(fd);
+ int error = -EBADF;
+ if (file) {
+ error = vfs_statfs(&file->f_path, st);
+ fput(file);
+ }
+ return error;
+}
- if (sizeof(*buf) == sizeof(st))
- memcpy(buf, &st, sizeof(st));
+static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
+{
+ struct statfs buf;
+
+ if (sizeof(buf) == sizeof(*st))
+ memcpy(&buf, st, sizeof(*st));
else {
- if (sizeof buf->f_blocks == 4) {
- if ((st.f_blocks | st.f_bfree | st.f_bavail |
- st.f_bsize | st.f_frsize) &
+ if (sizeof buf.f_blocks == 4) {
+ if ((st->f_blocks | st->f_bfree | st->f_bavail |
+ st->f_bsize | st->f_frsize) &
0xffffffff00000000ULL)
return -EOVERFLOW;
/*
* f_files and f_ffree may be -1; it's okay to stuff
* that into 32 bits
*/
- if (st.f_files != -1 &&
- (st.f_files & 0xffffffff00000000ULL))
+ if (st->f_files != -1 &&
+ (st->f_files & 0xffffffff00000000ULL))
return -EOVERFLOW;
- if (st.f_ffree != -1 &&
- (st.f_ffree & 0xffffffff00000000ULL))
+ if (st->f_ffree != -1 &&
+ (st->f_ffree & 0xffffffff00000000ULL))
return -EOVERFLOW;
}
- buf->f_type = st.f_type;
- buf->f_bsize = st.f_bsize;
- buf->f_blocks = st.f_blocks;
- buf->f_bfree = st.f_bfree;
- buf->f_bavail = st.f_bavail;
- buf->f_files = st.f_files;
- buf->f_ffree = st.f_ffree;
- buf->f_fsid = st.f_fsid;
- buf->f_namelen = st.f_namelen;
- buf->f_frsize = st.f_frsize;
- buf->f_flags = st.f_flags;
- memset(buf->f_spare, 0, sizeof(buf->f_spare));
+ buf.f_type = st->f_type;
+ buf.f_bsize = st->f_bsize;
+ buf.f_blocks = st->f_blocks;
+ buf.f_bfree = st->f_bfree;
+ buf.f_bavail = st->f_bavail;
+ buf.f_files = st->f_files;
+ buf.f_ffree = st->f_ffree;
+ buf.f_fsid = st->f_fsid;
+ buf.f_namelen = st->f_namelen;
+ buf.f_frsize = st->f_frsize;
+ buf.f_flags = st->f_flags;
+ memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
return 0;
}
-static int do_statfs64(struct path *path, struct statfs64 *buf)
+static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
{
- struct kstatfs st;
- int retval;
-
- retval = vfs_statfs(path, &st);
- if (retval)
- return retval;
-
- if (sizeof(*buf) == sizeof(st))
- memcpy(buf, &st, sizeof(st));
+ struct statfs64 buf;
+ if (sizeof(buf) == sizeof(*st))
+ memcpy(&buf, st, sizeof(*st));
else {
- buf->f_type = st.f_type;
- buf->f_bsize = st.f_bsize;
- buf->f_blocks = st.f_blocks;
- buf->f_bfree = st.f_bfree;
- buf->f_bavail = st.f_bavail;
- buf->f_files = st.f_files;
- buf->f_ffree = st.f_ffree;
- buf->f_fsid = st.f_fsid;
- buf->f_namelen = st.f_namelen;
- buf->f_frsize = st.f_frsize;
- buf->f_flags = st.f_flags;
- memset(buf->f_spare, 0, sizeof(buf->f_spare));
+ buf.f_type = st->f_type;
+ buf.f_bsize = st->f_bsize;
+ buf.f_blocks = st->f_blocks;
+ buf.f_bfree = st->f_bfree;
+ buf.f_bavail = st->f_bavail;
+ buf.f_files = st->f_files;
+ buf.f_ffree = st->f_ffree;
+ buf.f_fsid = st->f_fsid;
+ buf.f_namelen = st->f_namelen;
+ buf.f_frsize = st->f_frsize;
+ buf.f_flags = st->f_flags;
+ memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
+ if (copy_to_user(p, &buf, sizeof(buf)))
+ return -EFAULT;
return 0;
}
SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
{
- struct path path;
- int error;
-
- error = user_path(pathname, &path);
- if (!error) {
- struct statfs tmp;
- error = do_statfs_native(&path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_put(&path);
- }
+ struct kstatfs st;
+ int error = user_statfs(pathname, &st);
+ if (!error)
+ error = do_statfs_native(&st, buf);
return error;
}
SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
{
- struct path path;
- long error;
-
+ struct kstatfs st;
+ int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = user_path(pathname, &path);
- if (!error) {
- struct statfs64 tmp;
- error = do_statfs64(&path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- path_put(&path);
- }
+ error = user_statfs(pathname, &st);
+ if (!error)
+ error = do_statfs64(&st, buf);
return error;
}
SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
{
- struct file *file;
- struct statfs tmp;
- int error;
-
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = do_statfs_native(&file->f_path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
-out:
+ struct kstatfs st;
+ int error = fd_statfs(fd, &st);
+ if (!error)
+ error = do_statfs_native(&st, buf);
return error;
}
SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
{
- struct file *file;
- struct statfs64 tmp;
+ struct kstatfs st;
int error;
if (sz != sizeof(*buf))
return -EINVAL;
- error = -EBADF;
- file = fget(fd);
- if (!file)
- goto out;
- error = do_statfs64(&file->f_path, &tmp);
- if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
- error = -EFAULT;
- fput(file);
-out:
+ error = fd_statfs(fd, &st);
+ if (!error)
+ error = do_statfs64(&st, buf);
return error;
}
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b427b1208c26..e474fbcf8bde 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -245,7 +245,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
new_de = sysv_find_entry(new_dentry, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
sysv_set_link(new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -257,18 +256,15 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = sysv_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
sysv_delete_entry(old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
sysv_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 14f64b689d7f..7217d67a80a6 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
ubifs_assert(mutex_is_locked(&dir->i_mutex));
ubifs_assert(mutex_is_locked(&inode->i_mutex));
- /*
- * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
- * otherwise has the potential to corrupt the orphan inode list.
- *
- * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
- * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
- * lock 'dirA->i_mutex', so this is possible. Both of the functions
- * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
- * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
- * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
- * to the list of orphans. After this, 'vfs_link()' will link
- * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
- * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
- * to the list of orphans.
- */
- if (inode->i_nlink == 0)
- return -ENOENT;
-
err = dbg_check_synced_i_size(inode);
if (err)
return err;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2be0f9eb86d2..f1dce848ef96 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,8 @@
#include <linux/crc-itu-t.h>
#include <linux/exportfs.h>
+enum { UDF_MAX_LINKS = 0xffff };
+
static inline int udf_match(int len1, const unsigned char *name1, int len2,
const unsigned char *name2)
{
@@ -650,7 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct udf_inode_info *iinfo;
err = -EMLINK;
- if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
+ if (dir->i_nlink >= UDF_MAX_LINKS)
goto out;
err = -EIO;
@@ -1034,9 +1036,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
struct fileIdentDesc cfi, *fi;
int err;
- if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
+ if (inode->i_nlink >= UDF_MAX_LINKS)
return -EMLINK;
- }
fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
if (!fi) {
@@ -1131,9 +1132,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
goto end_rename;
retval = -EMLINK;
- if (!new_inode &&
- new_dir->i_nlink >=
- (256 << sizeof(new_dir->i_nlink)) - 1)
+ if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
goto end_rename;
}
if (!nfi) {
@@ -1287,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
struct fid *fid = (struct fid *)fh;
int type = FILEID_UDF_WITHOUT_PARENT;
- if (len < 3 || (connectable && len < 5))
+ if (connectable && (len < 5)) {
+ *lenp = 5;
return 255;
+ } else if (len < 3) {
+ *lenp = 3;
+ return 255;
+ }
*lenp = 3;
fid->udf.block = location.logicalBlockNum;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 12f39b9e4437..d6f681535eb8 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -306,7 +306,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
if (!new_de)
goto out_dir;
- inode_inc_link_count(old_inode);
ufs_set_link(new_dir, new_de, new_page, old_inode);
new_inode->i_ctime = CURRENT_TIME_SEC;
if (dir_de)
@@ -318,12 +317,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir->i_nlink >= UFS_LINK_MAX)
goto out_dir;
}
- inode_inc_link_count(old_inode);
err = ufs_add_link(new_dentry, old_inode);
- if (err) {
- inode_dec_link_count(old_inode);
+ if (err)
goto out_dir;
- }
if (dir_de)
inode_inc_link_count(new_dir);
}
@@ -331,12 +327,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
/*
* Like most other Unix systems, set the ctime for inodes on a
* rename.
- * inode_dec_link_count() will mark the inode dirty.
*/
old_inode->i_ctime = CURRENT_TIME_SEC;
ufs_delete_entry(old_dir, old_de, old_page);
- inode_dec_link_count(old_inode);
+ mark_inode_dirty(old_inode);
if (dir_de) {
ufs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ac1c7e8378dd..f83a4c830a65 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -2022,11 +2022,12 @@ xfs_buf_init(void)
if (!xfslogd_workqueue)
goto out_free_buf_zone;
- xfsdatad_workqueue = create_workqueue("xfsdatad");
+ xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
if (!xfsdatad_workqueue)
goto out_destroy_xfslogd_workqueue;
- xfsconvertd_workqueue = create_workqueue("xfsconvertd");
+ xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
+ WQ_MEM_RECLAIM, 1);
if (!xfsconvertd_workqueue)
goto out_destroy_xfsdatad_workqueue;
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 05201ae719e5..d61611c88012 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -152,6 +152,8 @@ xfs_ioc_trim(
if (!capable(CAP_SYS_ADMIN))
return -XFS_ERROR(EPERM);
+ if (!blk_queue_discard(q))
+ return -XFS_ERROR(EOPNOTSUPP);
if (copy_from_user(&range, urange, sizeof(range)))
return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index fc0114da7fdd..f4f878fc0083 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -89,8 +89,10 @@ xfs_fs_encode_fh(
* seven combinations work. The real answer is "don't use v2".
*/
len = xfs_fileid_length(fileid_type);
- if (*max_len < len)
+ if (*max_len < len) {
+ *max_len = len;
return 255;
+ }
*max_len = len;
switch (fileid_type) {
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f5e2a19e0f8e..0ca0e3c024d7 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -695,14 +695,19 @@ xfs_ioc_fsgeometry_v1(
xfs_mount_t *mp,
void __user *arg)
{
- xfs_fsop_geom_v1_t fsgeo;
+ xfs_fsop_geom_t fsgeo;
int error;
- error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3);
+ error = xfs_fs_geometry(mp, &fsgeo, 3);
if (error)
return -error;
- if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
+ /*
+ * Caller should have passed an argument of type
+ * xfs_fsop_geom_v1_t. This is a proper subset of the
+ * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
+ */
+ if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
return -XFS_ERROR(EFAULT);
return 0;
}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index bd5727852fd6..9ff7fc603d2f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -102,7 +102,8 @@ xfs_mark_inode_dirty(
STATIC int
xfs_init_security(
struct inode *inode,
- struct inode *dir)
+ struct inode *dir,
+ const struct qstr *qstr)
{
struct xfs_inode *ip = XFS_I(inode);
size_t length;
@@ -110,7 +111,7 @@ xfs_init_security(
unsigned char *name;
int error;
- error = security_inode_init_security(inode, dir, (char **)&name,
+ error = security_inode_init_security(inode, dir, qstr, (char **)&name,
&value, &length);
if (error) {
if (error == -EOPNOTSUPP)
@@ -194,7 +195,7 @@ xfs_vn_mknod(
inode = VFS_I(ip);
- error = xfs_init_security(inode, dir);
+ error = xfs_init_security(inode, dir, &dentry->d_name);
if (unlikely(error))
goto out_cleanup_inode;
@@ -367,7 +368,7 @@ xfs_vn_symlink(
inode = VFS_I(cip);
- error = xfs_init_security(inode, dir);
+ error = xfs_init_security(inode, dir, &dentry->d_name);
if (unlikely(error))
goto out_cleanup_inode;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cec89dd5d7d2..85668efb3e3e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -53,6 +53,9 @@ xfs_fs_geometry(
xfs_fsop_geom_t *geo,
int new_version)
{
+
+ memset(geo, 0, sizeof(*geo));
+
geo->blocksize = mp->m_sb.sb_blocksize;
geo->rtextsize = mp->m_sb.sb_rextsize;
geo->agblocks = mp->m_sb.sb_agblocks;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index edfa178bafb6..4aff56395732 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -309,7 +309,7 @@ xfs_mru_cache_init(void)
if (!xfs_mru_elem_zone)
goto out;
- xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
+ xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
if (!xfs_mru_reap_wq)
goto out_destroy_mru_elem_zone;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 78ca429929f7..ff103ba96b78 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -381,7 +381,7 @@ struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
int acpi_disable_wakeup_device_power(struct acpi_device *dev);
-#ifdef CONFIG_PM_OPS
+#ifdef CONFIG_PM
int acpi_pm_device_sleep_state(struct device *, int *);
#else
static inline int acpi_pm_device_sleep_state(struct device *d, int *p)
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 2bcc5c7c22a6..61e03dd7939e 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -30,6 +30,9 @@ typedef u64 cputime64_t;
#define cputime64_to_jiffies64(__ct) (__ct)
#define jiffies64_to_cputime64(__jif) (__jif)
#define cputime_to_cputime64(__ct) ((u64) __ct)
+#define cputime64_gt(__a, __b) ((__a) > (__b))
+
+#define nsecs_to_cputime64(__ct) nsecs_to_jiffies64(__ct)
/*
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 0fc16e3f0bfc..84793c7025e2 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -80,6 +80,10 @@
#define O_SYNC (__O_SYNC|O_DSYNC)
#endif
+#ifndef O_PATH
+#define O_PATH 010000000
+#endif
+
#ifndef O_NDELAY
#define O_NDELAY O_NONBLOCK
#endif
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index 3c2344f48136..01f227e14254 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -6,7 +6,7 @@
#include <asm/errno.h>
static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
@@ -16,7 +16,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+ if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
@@ -48,7 +48,8 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
}
static inline int
-futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
{
return -ENOSYS;
}
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 31b6188df221..b4bfe338ea0e 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -4,6 +4,8 @@
#ifndef __ASSEMBLY__
#ifdef CONFIG_MMU
+#include <linux/mm_types.h>
+
#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index b3bfabc258f3..c1a1216e29ce 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -11,6 +11,7 @@ extern char _sinittext[], _einittext[];
extern char _end[];
extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
extern char __kprobes_text_start[], __kprobes_text_end[];
+extern char __entry_text_start[], __entry_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __start_rodata[], __end_rodata[];
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index b969770196c2..57af0338d270 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -646,9 +646,13 @@ __SYSCALL(__NR_prlimit64, sys_prlimit64)
__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
#define __NR_fanotify_mark 263
__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
+#define __NR_name_to_handle_at 264
+__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
+#define __NR_open_by_handle_at 265
+__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
#undef __NR_syscalls
-#define __NR_syscalls 264
+#define __NR_syscalls 266
/*
* All syscalls below here should go away really,
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index fe77e3395b40..32c45e5fe0ab 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -15,7 +15,7 @@
* HEAD_TEXT_SECTION
* INIT_TEXT_SECTION(PAGE_SIZE)
* INIT_DATA_SECTION(...)
- * PERCPU(PAGE_SIZE)
+ * PERCPU(CACHELINE_SIZE, PAGE_SIZE)
* __init_end = .;
*
* _stext = .;
@@ -424,6 +424,12 @@
*(.kprobes.text) \
VMLINUX_SYMBOL(__kprobes_text_end) = .;
+#define ENTRY_TEXT \
+ ALIGN_FUNCTION(); \
+ VMLINUX_SYMBOL(__entry_text_start) = .; \
+ *(.entry.text) \
+ VMLINUX_SYMBOL(__entry_text_end) = .;
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#define IRQENTRY_TEXT \
ALIGN_FUNCTION(); \
@@ -683,13 +689,18 @@
/**
* PERCPU_VADDR - define output section for percpu area
+ * @cacheline: cacheline size
* @vaddr: explicit base address (optional)
* @phdr: destination PHDR (optional)
*
- * Macro which expands to output section for percpu area. If @vaddr
- * is not blank, it specifies explicit base address and all percpu
- * symbols will be offset from the given address. If blank, @vaddr
- * always equals @laddr + LOAD_OFFSET.
+ * Macro which expands to output section for percpu area.
+ *
+ * @cacheline is used to align subsections to avoid false cacheline
+ * sharing between subsections for different purposes.
+ *
+ * If @vaddr is not blank, it specifies explicit base address and all
+ * percpu symbols will be offset from the given address. If blank,
+ * @vaddr always equals @laddr + LOAD_OFFSET.
*
* @phdr defines the output PHDR to use if not blank. Be warned that
* output PHDR is sticky. If @phdr is specified, the next output
@@ -700,7 +711,7 @@
* If there is no need to put the percpu section at a predetermined
* address, use PERCPU().
*/
-#define PERCPU_VADDR(vaddr, phdr) \
+#define PERCPU_VADDR(cacheline, vaddr, phdr) \
VMLINUX_SYMBOL(__per_cpu_load) = .; \
.data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
- LOAD_OFFSET) { \
@@ -708,7 +719,9 @@
*(.data..percpu..first) \
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
+ . = ALIGN(cacheline); \
*(.data..percpu..readmostly) \
+ . = ALIGN(cacheline); \
*(.data..percpu) \
*(.data..percpu..shared_aligned) \
VMLINUX_SYMBOL(__per_cpu_end) = .; \
@@ -717,18 +730,18 @@
/**
* PERCPU - define output section for percpu area, simple version
+ * @cacheline: cacheline size
* @align: required alignment
*
- * Align to @align and outputs output section for percpu area. This
- * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
+ * Align to @align and outputs output section for percpu area. This macro
+ * doesn't manipulate @vaddr or @phdr and __per_cpu_load and
* __per_cpu_start will be identical.
*
- * This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except
- * that __per_cpu_load is defined as a relative symbol against
- * .data..percpu which is required for relocatable x86_32
- * configuration.
+ * This macro is equivalent to ALIGN(@align); PERCPU_VADDR(@cacheline,,)
+ * except that __per_cpu_load is defined as a relative symbol against
+ * .data..percpu which is required for relocatable x86_32 configuration.
*/
-#define PERCPU(align) \
+#define PERCPU(cacheline, align) \
. = ALIGN(align); \
.data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \
VMLINUX_SYMBOL(__per_cpu_load) = .; \
@@ -736,7 +749,9 @@
*(.data..percpu..first) \
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
+ . = ALIGN(cacheline); \
*(.data..percpu..readmostly) \
+ . = ALIGN(cacheline); \
*(.data..percpu) \
*(.data..percpu..shared_aligned) \
VMLINUX_SYMBOL(__per_cpu_end) = .; \
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index fe29aadb129d..348843b80150 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1101,7 +1101,7 @@ struct drm_device {
struct platform_device *platformdev; /**< Platform device struture */
struct drm_sg_mem *sg; /**< Scatter gather memory */
- int num_crtcs; /**< Number of CRTCs on this device */
+ unsigned int num_crtcs; /**< Number of CRTCs on this device */
void *dev_private; /**< device private data */
void *mm_private;
struct address_space *dev_mapping;
diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
index 5cb86c307f5d..fc4875433817 100644
--- a/include/keys/rxrpc-type.h
+++ b/include/keys/rxrpc-type.h
@@ -99,7 +99,6 @@ struct rxrpc_key_token {
* structure of raw payloads passed to add_key() or instantiate key
*/
struct rxrpc_key_data_v1 {
- u32 kif_version; /* 1 */
u16 security_index;
u16 ticket_length;
u32 expiry; /* time_t */
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 0c4929fa34d3..32df2b6ef0e0 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -89,6 +89,7 @@ enum {
ATA_ID_SPG = 98,
ATA_ID_LBA_CAPACITY_2 = 100,
ATA_ID_SECTOR_SIZE = 106,
+ ATA_ID_WWN = 108,
ATA_ID_LOGICAL_SECTOR_SIZE = 117, /* and 118 */
ATA_ID_LAST_LUN = 126,
ATA_ID_DLF = 128,
@@ -103,6 +104,7 @@ enum {
ATA_ID_SERNO_LEN = 20,
ATA_ID_FW_REV_LEN = 8,
ATA_ID_PROD_LEN = 40,
+ ATA_ID_WWN_LEN = 8,
ATA_PCI_CTL_OFS = 2,
@@ -598,42 +600,42 @@ static inline bool ata_id_has_dipm(const u16 *id)
}
-static inline int ata_id_has_fua(const u16 *id)
+static inline bool ata_id_has_fua(const u16 *id)
{
if ((id[ATA_ID_CFSSE] & 0xC000) != 0x4000)
- return 0;
+ return false;
return id[ATA_ID_CFSSE] & (1 << 6);
}
-static inline int ata_id_has_flush(const u16 *id)
+static inline bool ata_id_has_flush(const u16 *id)
{
if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
- return 0;
+ return false;
return id[ATA_ID_COMMAND_SET_2] & (1 << 12);
}
-static inline int ata_id_flush_enabled(const u16 *id)
+static inline bool ata_id_flush_enabled(const u16 *id)
{
if (ata_id_has_flush(id) == 0)
- return 0;
+ return false;
if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
- return 0;
+ return false;
return id[ATA_ID_CFS_ENABLE_2] & (1 << 12);
}
-static inline int ata_id_has_flush_ext(const u16 *id)
+static inline bool ata_id_has_flush_ext(const u16 *id)
{
if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
- return 0;
+ return false;
return id[ATA_ID_COMMAND_SET_2] & (1 << 13);
}
-static inline int ata_id_flush_ext_enabled(const u16 *id)
+static inline bool ata_id_flush_ext_enabled(const u16 *id)
{
if (ata_id_has_flush_ext(id) == 0)
- return 0;
+ return false;
if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
- return 0;
+ return false;
/*
* some Maxtor disks have bit 13 defined incorrectly
* so check bit 10 too
@@ -686,64 +688,64 @@ static inline u16 ata_id_logical_sector_offset(const u16 *id,
return 0;
}
-static inline int ata_id_has_lba48(const u16 *id)
+static inline bool ata_id_has_lba48(const u16 *id)
{
if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
- return 0;
+ return false;
if (!ata_id_u64(id, ATA_ID_LBA_CAPACITY_2))
- return 0;
+ return false;
return id[ATA_ID_COMMAND_SET_2] & (1 << 10);
}
-static inline int ata_id_lba48_enabled(const u16 *id)
+static inline bool ata_id_lba48_enabled(const u16 *id)
{
if (ata_id_has_lba48(id) == 0)
- return 0;
+ return false;
if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
- return 0;
+ return false;
return id[ATA_ID_CFS_ENABLE_2] & (1 << 10);
}
-static inline int ata_id_hpa_enabled(const u16 *id)
+static inline bool ata_id_hpa_enabled(const u16 *id)
{
/* Yes children, word 83 valid bits cover word 82 data */
if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
- return 0;
+ return false;
/* And 87 covers 85-87 */
if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
- return 0;
+ return false;
/* Check command sets enabled as well as supported */
if ((id[ATA_ID_CFS_ENABLE_1] & (1 << 10)) == 0)
- return 0;
+ return false;
return id[ATA_ID_COMMAND_SET_1] & (1 << 10);
}
-static inline int ata_id_has_wcache(const u16 *id)
+static inline bool ata_id_has_wcache(const u16 *id)
{
/* Yes children, word 83 valid bits cover word 82 data */
if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
- return 0;
+ return false;
return id[ATA_ID_COMMAND_SET_1] & (1 << 5);
}
-static inline int ata_id_has_pm(const u16 *id)
+static inline bool ata_id_has_pm(const u16 *id)
{
if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000)
- return 0;
+ return false;
return id[ATA_ID_COMMAND_SET_1] & (1 << 3);
}
-static inline int ata_id_rahead_enabled(const u16 *id)
+static inline bool ata_id_rahead_enabled(const u16 *id)
{
if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
- return 0;
+ return false;
return id[ATA_ID_CFS_ENABLE_1] & (1 << 6);
}
-static inline int ata_id_wcache_enabled(const u16 *id)
+static inline bool ata_id_wcache_enabled(const u16 *id)
{
if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000)
- return 0;
+ return false;
return id[ATA_ID_CFS_ENABLE_1] & (1 << 5);
}
@@ -773,7 +775,7 @@ static inline unsigned int ata_id_major_version(const u16 *id)
return mver;
}
-static inline int ata_id_is_sata(const u16 *id)
+static inline bool ata_id_is_sata(const u16 *id)
{
/*
* See if word 93 is 0 AND drive is at least ATA-5 compatible
@@ -782,37 +784,40 @@ static inline int ata_id_is_sata(const u16 *id)
* 0x0000 and 0xffff along with the earlier ATA revisions...
*/
if (id[ATA_ID_HW_CONFIG] == 0 && (short)id[ATA_ID_MAJOR_VER] >= 0x0020)
- return 1;
- return 0;
+ return true;
+ return false;
}
-static inline int ata_id_has_tpm(const u16 *id)
+static inline bool ata_id_has_tpm(const u16 *id)
{
/* The TPM bits are only valid on ATA8 */
if (ata_id_major_version(id) < 8)
- return 0;
+ return false;
if ((id[48] & 0xC000) != 0x4000)
- return 0;
+ return false;
return id[48] & (1 << 0);
}
-static inline int ata_id_has_dword_io(const u16 *id)
+static inline bool ata_id_has_dword_io(const u16 *id)
{
/* ATA 8 reuses this flag for "trusted" computing */
if (ata_id_major_version(id) > 7)
- return 0;
- if (id[ATA_ID_DWORD_IO] & (1 << 0))
- return 1;
- return 0;
+ return false;
+ return id[ATA_ID_DWORD_IO] & (1 << 0);
}
-static inline int ata_id_has_unload(const u16 *id)
+static inline bool ata_id_has_unload(const u16 *id)
{
if (ata_id_major_version(id) >= 7 &&
(id[ATA_ID_CFSSE] & 0xC000) == 0x4000 &&
id[ATA_ID_CFSSE] & (1 << 13))
- return 1;
- return 0;
+ return true;
+ return false;
+}
+
+static inline bool ata_id_has_wwn(const u16 *id)
+{
+ return (id[ATA_ID_CSF_DEFAULT] & 0xC100) == 0x4100;
}
static inline int ata_id_form_factor(const u16 *id)
@@ -843,25 +848,25 @@ static inline int ata_id_rotation_rate(const u16 *id)
return val;
}
-static inline int ata_id_has_trim(const u16 *id)
+static inline bool ata_id_has_trim(const u16 *id)
{
if (ata_id_major_version(id) >= 7 &&
(id[ATA_ID_DATA_SET_MGMT] & 1))
- return 1;
- return 0;
+ return true;
+ return false;
}
-static inline int ata_id_has_zero_after_trim(const u16 *id)
+static inline bool ata_id_has_zero_after_trim(const u16 *id)
{
/* DSM supported, deterministic read, and read zero after trim set */
if (ata_id_has_trim(id) &&
(id[ATA_ID_ADDITIONAL_SUPP] & 0x4020) == 0x4020)
- return 1;
+ return true;
- return 0;
+ return false;
}
-static inline int ata_id_current_chs_valid(const u16 *id)
+static inline bool ata_id_current_chs_valid(const u16 *id)
{
/* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command
has not been issued to the device then the values of
@@ -873,11 +878,11 @@ static inline int ata_id_current_chs_valid(const u16 *id)
id[ATA_ID_CUR_SECTORS]; /* sectors in current translation */
}
-static inline int ata_id_is_cfa(const u16 *id)
+static inline bool ata_id_is_cfa(const u16 *id)
{
if ((id[ATA_ID_CONFIG] == 0x848A) || /* Traditional CF */
(id[ATA_ID_CONFIG] == 0x844A)) /* Delkin Devices CF */
- return 1;
+ return true;
/*
* CF specs don't require specific value in the word 0 anymore and yet
* they forbid to report the ATA version in the word 80 and require the
@@ -886,44 +891,40 @@ static inline int ata_id_is_cfa(const u16 *id)
* and while those that don't indicate CFA feature support need some
* sort of quirk list, it seems impractical for the ones that do...
*/
- if ((id[ATA_ID_COMMAND_SET_2] & 0xC004) == 0x4004)
- return 1;
- return 0;
+ return (id[ATA_ID_COMMAND_SET_2] & 0xC004) == 0x4004;
}
-static inline int ata_id_is_ssd(const u16 *id)
+static inline bool ata_id_is_ssd(const u16 *id)
{
return id[ATA_ID_ROT_SPEED] == 0x01;
}
-static inline int ata_id_pio_need_iordy(const u16 *id, const u8 pio)
+static inline bool ata_id_pio_need_iordy(const u16 *id, const u8 pio)
{
/* CF spec. r4.1 Table 22 says no IORDY on PIO5 and PIO6. */
if (pio > 4 && ata_id_is_cfa(id))
- return 0;
+ return false;
/* For PIO3 and higher it is mandatory. */
if (pio > 2)
- return 1;
+ return true;
/* Turn it on when possible. */
- if (ata_id_has_iordy(id))
- return 1;
- return 0;
+ return ata_id_has_iordy(id);
}
-static inline int ata_drive_40wire(const u16 *dev_id)
+static inline bool ata_drive_40wire(const u16 *dev_id)
{
if (ata_id_is_sata(dev_id))
- return 0; /* SATA */
+ return false; /* SATA */
if ((dev_id[ATA_ID_HW_CONFIG] & 0xE000) == 0x6000)
- return 0; /* 80 wire */
- return 1;
+ return false; /* 80 wire */
+ return true;
}
-static inline int ata_drive_40wire_relaxed(const u16 *dev_id)
+static inline bool ata_drive_40wire_relaxed(const u16 *dev_id)
{
if ((dev_id[ATA_ID_HW_CONFIG] & 0x2000) == 0x2000)
- return 0; /* 80 wire */
- return 1;
+ return false; /* 80 wire */
+ return true;
}
static inline int atapi_cdb_len(const u16 *dev_id)
@@ -936,12 +937,12 @@ static inline int atapi_cdb_len(const u16 *dev_id)
}
}
-static inline int atapi_command_packet_set(const u16 *dev_id)
+static inline bool atapi_command_packet_set(const u16 *dev_id)
{
return (dev_id[ATA_ID_CONFIG] >> 8) & 0x1f;
}
-static inline int atapi_id_dmadir(const u16 *dev_id)
+static inline bool atapi_id_dmadir(const u16 *dev_id)
{
return ata_id_major_version(dev_id) >= 7 && (dev_id[62] & 0x8000);
}
@@ -954,13 +955,13 @@ static inline int atapi_id_dmadir(const u16 *dev_id)
*
* It is called only once for each device.
*/
-static inline int ata_id_is_lba_capacity_ok(u16 *id)
+static inline bool ata_id_is_lba_capacity_ok(u16 *id)
{
unsigned long lba_sects, chs_sects, head, tail;
/* No non-LBA info .. so valid! */
if (id[ATA_ID_CYLS] == 0)
- return 1;
+ return true;
lba_sects = ata_id_u32(id, ATA_ID_LBA_CAPACITY);
@@ -975,13 +976,13 @@ static inline int ata_id_is_lba_capacity_ok(u16 *id)
id[ATA_ID_SECTORS] == 63 &&
(id[ATA_ID_HEADS] == 15 || id[ATA_ID_HEADS] == 16) &&
(lba_sects >= 16383 * 63 * id[ATA_ID_HEADS]))
- return 1;
+ return true;
chs_sects = id[ATA_ID_CYLS] * id[ATA_ID_HEADS] * id[ATA_ID_SECTORS];
/* perform a rough sanity check on lba_sects: within 10% is OK */
if (lba_sects - chs_sects < chs_sects/10)
- return 1;
+ return true;
/* some drives have the word order reversed */
head = (lba_sects >> 16) & 0xffff;
@@ -990,10 +991,10 @@ static inline int ata_id_is_lba_capacity_ok(u16 *id)
if (lba_sects - chs_sects < chs_sects/10) {
*(__le32 *)&id[ATA_ID_LBA_CAPACITY] = __cpu_to_le32(lba_sects);
- return 1; /* LBA capacity is (now) good */
+ return true; /* LBA capacity is (now) good */
}
- return 0; /* LBA capacity value may be bad */
+ return false; /* LBA capacity value may be bad */
}
static inline void ata_id_to_hd_driveid(u16 *id)
@@ -1051,19 +1052,19 @@ static inline int is_multi_taskfile(struct ata_taskfile *tf)
(tf->command == ATA_CMD_WRITE_MULTI_FUA_EXT);
}
-static inline int ata_ok(u8 status)
+static inline bool ata_ok(u8 status)
{
return ((status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | ATA_ERR))
== ATA_DRDY);
}
-static inline int lba_28_ok(u64 block, u32 n_block)
+static inline bool lba_28_ok(u64 block, u32 n_block)
{
/* check the ending block number: must be LESS THAN 0x0fffffff */
return ((block + n_block) < ((1 << 28) - 1)) && (n_block <= 256);
}
-static inline int lba_48_ok(u64 block, u32 n_block)
+static inline bool lba_48_ok(u64 block, u32 n_block)
{
/* check the ending block number */
return ((block + n_block - 1) < ((u64)1 << 48)) && (n_block <= 65536);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4d18ff34670a..d5063e1b5555 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -699,7 +699,7 @@ extern void blk_start_queue(struct request_queue *q);
extern void blk_stop_queue(struct request_queue *q);
extern void blk_sync_queue(struct request_queue *q);
extern void __blk_stop_queue(struct request_queue *q);
-extern void __blk_run_queue(struct request_queue *);
+extern void __blk_run_queue(struct request_queue *q, bool force_kblockd);
extern void blk_run_queue(struct request_queue *);
extern int blk_rq_map_user(struct request_queue *, struct request *,
struct rq_map_data *, void __user *, unsigned long,
@@ -1088,7 +1088,6 @@ static inline void put_dev_sector(Sector p)
struct work_struct;
int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
-int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay);
#ifdef CONFIG_BLK_CGROUP
/*
@@ -1136,7 +1135,6 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
extern int blk_throtl_init(struct request_queue *q);
extern void blk_throtl_exit(struct request_queue *q);
extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
-extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay);
extern void throtl_shutdown_timer_wq(struct request_queue *q);
#else /* CONFIG_BLK_DEV_THROTTLING */
static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
@@ -1146,7 +1144,6 @@ static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
static inline int blk_throtl_init(struct request_queue *q) { return 0; }
static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
-static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {}
static inline void throtl_shutdown_timer_wq(struct request_queue *q) {}
#endif /* CONFIG_BLK_DEV_THROTTLING */
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 3395cf7130f5..b22fb0d3db0f 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -245,7 +245,6 @@ static inline int blk_cmd_buf_len(struct request *rq)
extern void blk_dump_cmd(char *buf, struct request *rq);
extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
-extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq);
#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index c3011beac30d..31d91a64838b 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -123,6 +123,7 @@ struct ceph_msg_pos {
#define SOCK_CLOSED 11 /* socket state changed to closed */
#define OPENING 13 /* open connection w/ (possibly new) peer */
#define DEAD 14 /* dead, about to kfree */
+#define BACKOFF 15
/*
* A single connection with another host.
@@ -160,7 +161,6 @@ struct ceph_connection {
struct list_head out_queue;
struct list_head out_sent; /* sending or sent but unacked */
u64 out_seq; /* last message queued for send */
- bool out_keepalive_pending;
u64 in_seq, in_seq_acked; /* last message received, acked */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ce104e33cd22..e654fa239916 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -474,7 +474,8 @@ struct cgroup_subsys {
struct cgroup *old_cgrp, struct task_struct *tsk,
bool threadgroup);
void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
- void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
+ void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct cgroup *old_cgrp, struct task_struct *task);
int (*populate)(struct cgroup_subsys *ss,
struct cgroup *cgrp);
void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp);
@@ -626,6 +627,7 @@ bool css_is_ancestor(struct cgroup_subsys_state *cg,
/* Get id and depth of css */
unsigned short css_id(struct cgroup_subsys_state *css);
unsigned short css_depth(struct cgroup_subsys_state *css);
+struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id);
#else /* !CONFIG_CGROUPS */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index ccefff02b6cb..cdbfcb8780ec 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -65,4 +65,8 @@ SUBSYS(net_cls)
SUBSYS(blkio)
#endif
+#ifdef CONFIG_CGROUP_PERF
+SUBSYS(perf)
+#endif
+
/* */
diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 68cd248f6d3e..66900e3c6eb1 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -101,8 +101,8 @@ struct ieee_pfc {
*/
struct dcb_app {
__u8 selector;
- __u32 protocol;
__u8 priority;
+ __u16 protocol;
};
struct dcbmsg {
diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 597692f1fc8d..65970b811e22 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -34,7 +34,10 @@ struct debug_obj {
/**
* struct debug_obj_descr - object type specific debug description structure
+ *
* @name: name of the object typee
+ * @debug_hint: function returning address, which have associated
+ * kernel symbol, to allow identify the object
* @fixup_init: fixup function, which is called when the init check
* fails
* @fixup_activate: fixup function, which is called when the activate check
@@ -46,7 +49,7 @@ struct debug_obj {
*/
struct debug_obj_descr {
const char *name;
-
+ void *(*debug_hint) (void *addr);
int (*fixup_init) (void *addr, enum debug_obj_state state);
int (*fixup_activate) (void *addr, enum debug_obj_state state);
int (*fixup_destroy) (void *addr, enum debug_obj_state state);
diff --git a/include/linux/device.h b/include/linux/device.h
index 1bf5cf0b4513..dba775a68752 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -128,9 +128,7 @@ struct device_driver {
bool suppress_bind_attrs; /* disables bind/unbind via sysfs */
-#if defined(CONFIG_OF)
const struct of_device_id *of_match_table;
-#endif
int (*probe) (struct device *dev);
int (*remove) (struct device *dev);
@@ -422,6 +420,7 @@ struct device {
void *platform_data; /* Platform specific data, device
core doesn't touch it */
struct dev_pm_info power;
+ struct dev_power_domain *pwr_domain;
#ifdef CONFIG_NUMA
int numa_node; /* NUMA node this device is close to */
@@ -441,9 +440,8 @@ struct device {
override */
/* arch specific additions */
struct dev_archdata archdata;
-#ifdef CONFIG_OF
- struct device_node *of_node;
-#endif
+
+ struct device_node *of_node; /* associated device tree node */
dev_t devt; /* dev_t, creates the sysfs "dev" */
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 28028988c862..33a42f24b275 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -8,6 +8,9 @@ struct inode;
struct super_block;
struct vfsmount;
+/* limit the handle size to NFSv4 handle size now */
+#define MAX_HANDLE_SZ 128
+
/*
* The fileid_type identifies how the file within the filesystem is encoded.
* In theory this is freely set and parsed by the filesystem, but we try to
@@ -121,8 +124,10 @@ struct fid {
* set, the encode_fh() should store sufficient information so that a good
* attempt can be made to find not only the file but also it's place in the
* filesystem. This typically means storing a reference to de->d_parent in
- * the filehandle fragment. encode_fh() should return the number of bytes
- * stored or a negative error code such as %-ENOSPC
+ * the filehandle fragment. encode_fh() should return the fileid_type on
+ * success and on error returns 255 (if the space needed to encode fh is
+ * greater than @max_len*4 bytes). On error @max_len contains the minimum
+ * size(in 4 byte unit) needed to encode the file handle.
*
* fh_to_dentry:
* @fh_to_dentry is given a &struct super_block (@sb) and a file handle
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 65990ef612f5..6043c64c207a 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -884,7 +884,8 @@ extern int ext3fs_dirhash(const char *name, int len, struct
dx_hash_info *hinfo);
/* ialloc.c */
-extern struct inode * ext3_new_inode (handle_t *, struct inode *, int);
+extern struct inode * ext3_new_inode (handle_t *, struct inode *,
+ const struct qstr *, int);
extern void ext3_free_inode (handle_t *, struct inode *);
extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
extern unsigned long ext3_count_free_inodes (struct super_block *);
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index a562fa5fb4e3..f550f894ba15 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -46,6 +46,7 @@
unlinking file. */
#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
+#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
#ifdef __KERNEL__
diff --git a/include/linux/file.h b/include/linux/file.h
index e85baebf6279..21a79958541c 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -29,6 +29,8 @@ static inline void fput_light(struct file *file, int fput_needed)
extern struct file *fget(unsigned int fd);
extern struct file *fget_light(unsigned int fd, int *fput_needed);
+extern struct file *fget_raw(unsigned int fd);
+extern struct file *fget_raw_light(unsigned int fd, int *fput_needed);
extern void set_close_on_exec(unsigned int fd, int flag);
extern void put_filp(struct file *);
extern int alloc_fd(unsigned start, unsigned flags);
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index da7e52b099f3..1effc8b56b4e 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -109,7 +109,7 @@ static inline void freezer_count(void)
}
/*
- * Check if the task should be counted as freezeable by the freezer
+ * Check if the task should be counted as freezable by the freezer
*/
static inline int freezer_should_skip(struct task_struct *p)
{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bd3215940c37..2f5a71d6d766 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -102,6 +102,9 @@ struct inodes_stat_t {
/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
+/* File is opened with O_PATH; almost nothing can be done with it */
+#define FMODE_PATH ((__force fmode_t)0x4000)
+
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
@@ -649,6 +652,7 @@ struct address_space {
spinlock_t private_lock; /* for use by the address_space */
struct list_head private_list; /* ditto */
struct address_space *assoc_mapping; /* ditto */
+ struct mutex unmap_mutex; /* to protect unmapping */
} __attribute__((aligned(sizeof(long))));
/*
* On most architectures that alignment is already the case; but
@@ -797,8 +801,7 @@ struct inode {
#endif
#ifdef CONFIG_IMA
- /* protected by i_lock */
- unsigned int i_readcount; /* struct files open RO */
+ atomic_t i_readcount; /* struct files open RO */
#endif
atomic_t i_writecount;
#ifdef CONFIG_SECURITY
@@ -977,6 +980,13 @@ struct file {
#endif
};
+struct file_handle {
+ __u32 handle_bytes;
+ int handle_type;
+ /* file identifier */
+ unsigned char f_handle[0];
+};
+
#define get_file(x) atomic_long_inc(&(x)->f_count)
#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
#define file_count(x) atomic_long_read(&(x)->f_count)
@@ -1400,6 +1410,7 @@ struct super_block {
wait_queue_head_t s_wait_unfrozen;
char s_id[32]; /* Informational name */
+ u8 s_uuid[16]; /* UUID */
void *s_fs_info; /* Filesystem private info */
fmode_t s_mode;
@@ -1873,6 +1884,8 @@ extern void drop_collected_mounts(struct vfsmount *);
extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
struct vfsmount *);
extern int vfs_statfs(struct path *, struct kstatfs *);
+extern int user_statfs(const char __user *, struct kstatfs *);
+extern int fd_statfs(int, struct kstatfs *);
extern int statfs_by_dentry(struct dentry *, struct kstatfs *);
extern int freeze_super(struct super_block *super);
extern int thaw_super(struct super_block *super);
@@ -1989,6 +2002,8 @@ extern int do_fallocate(struct file *file, int mode, loff_t offset,
extern long do_sys_open(int dfd, const char __user *filename, int flags,
int mode);
extern struct file *filp_open(const char *, int, int);
+extern struct file *file_open_root(struct dentry *, struct vfsmount *,
+ const char *, int);
extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
const struct cred *);
extern int filp_close(struct file *, fl_owner_t id);
@@ -2139,7 +2154,7 @@ extern void check_disk_size_change(struct gendisk *disk,
struct block_device *bdev);
extern int revalidate_disk(struct gendisk *);
extern int check_disk_change(struct block_device *);
-extern int __invalidate_device(struct block_device *);
+extern int __invalidate_device(struct block_device *, bool);
extern int invalidate_partition(struct gendisk *, int);
#endif
unsigned long invalidate_mapping_pages(struct address_space *mapping,
@@ -2199,15 +2214,31 @@ static inline void allow_write_access(struct file *file)
if (file)
atomic_inc(&file->f_path.dentry->d_inode->i_writecount);
}
+#ifdef CONFIG_IMA
+static inline void i_readcount_dec(struct inode *inode)
+{
+ BUG_ON(!atomic_read(&inode->i_readcount));
+ atomic_dec(&inode->i_readcount);
+}
+static inline void i_readcount_inc(struct inode *inode)
+{
+ atomic_inc(&inode->i_readcount);
+}
+#else
+static inline void i_readcount_dec(struct inode *inode)
+{
+ return;
+}
+static inline void i_readcount_inc(struct inode *inode)
+{
+ return;
+}
+#endif
extern int do_pipe_flags(int *, int);
extern struct file *create_read_pipe(struct file *f, int flags);
extern struct file *create_write_pipe(int flags);
extern void free_write_pipe(struct file *);
-extern struct file *do_filp_open(int dfd, const char *pathname,
- int open_flag, int mode, int acc_mode);
-extern int may_open(struct path *, int, int);
-
extern int kernel_read(struct file *, loff_t, char *, unsigned long);
extern struct file * open_exec(const char *);
@@ -2225,6 +2256,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
extern int inode_init_always(struct super_block *, struct inode *);
extern void inode_init_once(struct inode *);
+extern void address_space_init_once(struct address_space *mapping);
extern void ihold(struct inode * inode);
extern void iput(struct inode *);
extern struct inode * igrab(struct inode *);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index dcd6a7c3a435..ca29e03c1fac 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -428,6 +428,7 @@ extern void unregister_ftrace_graph(void);
extern void ftrace_graph_init_task(struct task_struct *t);
extern void ftrace_graph_exit_task(struct task_struct *t);
+extern void ftrace_graph_init_idle_task(struct task_struct *t, int cpu);
static inline int task_curr_ret_stack(struct task_struct *t)
{
@@ -451,6 +452,7 @@ static inline void unpause_graph_tracing(void)
static inline void ftrace_graph_init_task(struct task_struct *t) { }
static inline void ftrace_graph_exit_task(struct task_struct *t) { }
+static inline void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) { }
static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc,
trace_func_graph_ent_t entryfunc)
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 47e3997f7b5c..22b32af1b5ec 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -37,7 +37,6 @@ struct trace_entry {
unsigned char flags;
unsigned char preempt_count;
int pid;
- int lock_depth;
};
#define FTRACE_MAX_EVENT \
@@ -208,7 +207,6 @@ struct ftrace_event_call {
#define PERF_MAX_TRACE_SIZE 2048
-#define MAX_FILTER_PRED 32
#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
extern void destroy_preds(struct ftrace_event_call *call);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0b84c61607e8..dca31761b311 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -332,16 +332,19 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
return alloc_pages_current(gfp_mask, order);
}
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
- struct vm_area_struct *vma, unsigned long addr);
+ struct vm_area_struct *vma, unsigned long addr,
+ int node);
#else
#define alloc_pages(gfp_mask, order) \
alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr) \
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node) \
alloc_pages(gfp_mask, order)
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
-#define alloc_page_vma(gfp_mask, vma, addr) \
- alloc_pages_vma(gfp_mask, 0, vma, addr)
+#define alloc_page_vma(gfp_mask, vma, addr) \
+ alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
+#define alloc_page_vma_node(gfp_mask, vma, addr, node) \
+ alloc_pages_vma(gfp_mask, 0, vma, addr, node)
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index f376ddc64c4d..62f500c724f9 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -54,11 +54,13 @@ enum hrtimer_restart {
* 0x00 inactive
* 0x01 enqueued into rbtree
* 0x02 callback function running
+ * 0x04 timer is migrated to another cpu
*
* Special cases:
* 0x03 callback function running and enqueued
* (was requeued on another CPU)
- * 0x09 timer was migrated on CPU hotunplug
+ * 0x05 timer was migrated on CPU hotunplug
+ *
* The "callback function running and enqueued" status is only possible on
* SMP. It happens for example when a posix timer expired and the callback
* queued a signal. Between dropping the lock which protects the posix timer
@@ -67,8 +69,11 @@ enum hrtimer_restart {
* as otherwise the timer could be removed before the softirq code finishes the
* the handling of the timer.
*
- * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state to
- * preserve the HRTIMER_STATE_CALLBACK bit in the above scenario.
+ * The HRTIMER_STATE_ENQUEUED bit is always or'ed to the current state
+ * to preserve the HRTIMER_STATE_CALLBACK in the above scenario. This
+ * also affects HRTIMER_STATE_MIGRATE where the preservation is not
+ * necessary. HRTIMER_STATE_MIGRATE is cleared after the timer is
+ * enqueued on the new cpu.
*
* All state transitions are protected by cpu_base->lock.
*/
@@ -148,7 +153,12 @@ struct hrtimer_clock_base {
#endif
};
-#define HRTIMER_MAX_CLOCK_BASES 2
+enum hrtimer_base_type {
+ HRTIMER_BASE_REALTIME,
+ HRTIMER_BASE_MONOTONIC,
+ HRTIMER_BASE_BOOTTIME,
+ HRTIMER_MAX_CLOCK_BASES,
+};
/*
* struct hrtimer_cpu_base - the per cpu clock bases
@@ -308,6 +318,7 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
extern ktime_t ktime_get(void);
extern ktime_t ktime_get_real(void);
+extern ktime_t ktime_get_boottime(void);
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
@@ -370,8 +381,9 @@ extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
extern ktime_t hrtimer_get_next_event(void);
/*
- * A timer is active, when it is enqueued into the rbtree or the callback
- * function is running.
+ * A timer is active, when it is enqueued into the rbtree or the
+ * callback function is running or it's in the state of being migrated
+ * to another cpu.
*/
static inline int hrtimer_active(const struct hrtimer *timer)
{
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 903576df88dc..06a8d9c7de98 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -258,9 +258,7 @@ struct i2c_board_info {
unsigned short addr;
void *platform_data;
struct dev_archdata *archdata;
-#ifdef CONFIG_OF
struct device_node *of_node;
-#endif
int irq;
};
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 975837e7d6c0..09e6e62f9953 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -20,7 +20,6 @@ extern void ima_inode_free(struct inode *inode);
extern int ima_file_check(struct file *file, int mask);
extern void ima_file_free(struct file *file);
extern int ima_file_mmap(struct file *file, unsigned long prot);
-extern void ima_counts_get(struct file *file);
#else
static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -53,10 +52,5 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot)
return 0;
}
-static inline void ima_counts_get(struct file *file)
-{
- return;
-}
-
#endif /* CONFIG_IMA_H */
#endif /* _LINUX_IMA_H */
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 55e0d4253e49..59b72ca1c5d1 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -14,6 +14,8 @@
#include <linux/smp.h>
#include <linux/percpu.h>
#include <linux/hrtimer.h>
+#include <linux/kref.h>
+#include <linux/workqueue.h>
#include <asm/atomic.h>
#include <asm/ptrace.h>
@@ -55,7 +57,8 @@
* Used by threaded interrupts which need to keep the
* irq line disabled until the threaded handler has been run.
* IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
- *
+ * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
+ * IRQF_NO_THREAD - Interrupt cannot be threaded
*/
#define IRQF_DISABLED 0x00000020
#define IRQF_SAMPLE_RANDOM 0x00000040
@@ -67,22 +70,10 @@
#define IRQF_IRQPOLL 0x00001000
#define IRQF_ONESHOT 0x00002000
#define IRQF_NO_SUSPEND 0x00004000
+#define IRQF_FORCE_RESUME 0x00008000
+#define IRQF_NO_THREAD 0x00010000
-#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND)
-
-/*
- * Bits used by threaded handlers:
- * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
- * IRQTF_DIED - handler thread died
- * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
- * IRQTF_AFFINITY - irq thread is requested to adjust affinity
- */
-enum {
- IRQTF_RUNTHREAD,
- IRQTF_DIED,
- IRQTF_WARNED,
- IRQTF_AFFINITY,
-};
+#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
/*
* These values can be returned by request_any_context_irq() and
@@ -110,6 +101,7 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
* @thread_fn: interupt handler function for threaded interrupts
* @thread: thread pointer for threaded interrupts
* @thread_flags: flags related to @thread
+ * @thread_mask: bitmask for keeping track of @thread activity
*/
struct irqaction {
irq_handler_t handler;
@@ -120,6 +112,7 @@ struct irqaction {
irq_handler_t thread_fn;
struct task_struct *thread;
unsigned long thread_flags;
+ unsigned long thread_mask;
const char *name;
struct proc_dir_entry *dir;
} ____cacheline_internodealigned_in_smp;
@@ -240,6 +233,35 @@ extern int irq_can_set_affinity(unsigned int irq);
extern int irq_select_affinity(unsigned int irq);
extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
+
+/**
+ * struct irq_affinity_notify - context for notification of IRQ affinity changes
+ * @irq: Interrupt to which notification applies
+ * @kref: Reference count, for internal use
+ * @work: Work item, for internal use
+ * @notify: Function to be called on change. This will be
+ * called in process context.
+ * @release: Function to be called on release. This will be
+ * called in process context. Once registered, the
+ * structure must only be freed when this function is
+ * called or later.
+ */
+struct irq_affinity_notify {
+ unsigned int irq;
+ struct kref kref;
+ struct work_struct work;
+ void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
+ void (*release)(struct kref *ref);
+};
+
+extern int
+irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
+
+static inline void irq_run_affinity_notifiers(void)
+{
+ flush_scheduled_work();
+}
+
#else /* CONFIG_SMP */
static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
@@ -255,7 +277,7 @@ static inline int irq_can_set_affinity(unsigned int irq)
static inline int irq_select_affinity(unsigned int irq) { return 0; }
static inline int irq_set_affinity_hint(unsigned int irq,
- const struct cpumask *m)
+ const struct cpumask *m)
{
return -EINVAL;
}
@@ -314,16 +336,24 @@ static inline void enable_irq_lockdep_irqrestore(unsigned int irq, unsigned long
}
/* IRQ wakeup (PM) control: */
-extern int set_irq_wake(unsigned int irq, unsigned int on);
+extern int irq_set_irq_wake(unsigned int irq, unsigned int on);
+
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+/* Please do not use: Use the replacement functions instead */
+static inline int set_irq_wake(unsigned int irq, unsigned int on)
+{
+ return irq_set_irq_wake(irq, on);
+}
+#endif
static inline int enable_irq_wake(unsigned int irq)
{
- return set_irq_wake(irq, 1);
+ return irq_set_irq_wake(irq, 1);
}
static inline int disable_irq_wake(unsigned int irq)
{
- return set_irq_wake(irq, 0);
+ return irq_set_irq_wake(irq, 0);
}
#else /* !CONFIG_GENERIC_HARDIRQS */
@@ -353,6 +383,13 @@ static inline int disable_irq_wake(unsigned int irq)
}
#endif /* CONFIG_GENERIC_HARDIRQS */
+
+#ifdef CONFIG_IRQ_FORCED_THREADING
+extern bool force_irqthreads;
+#else
+#define force_irqthreads (0)
+#endif
+
#ifndef __ARCH_SET_SOFTIRQ_PENDING
#define set_softirq_pending(x) (local_softirq_pending() = (x))
#define or_softirq_pending(x) (local_softirq_pending() |= (x))
@@ -426,6 +463,13 @@ extern void raise_softirq(unsigned int nr);
*/
DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
+DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
+
+static inline struct task_struct *this_cpu_ksoftirqd(void)
+{
+ return this_cpu_read(ksoftirqd);
+}
+
/* Try to send a softirq to a remote cpu. If this cannot be done, the
* work will be queued to the local cpu.
*/
@@ -645,6 +689,7 @@ static inline void init_irq_proc(void)
struct seq_file;
int show_interrupts(struct seq_file *p, void *v);
+int arch_show_interrupts(struct seq_file *p, int prec);
extern int early_irq_init(void);
extern int arch_probe_nr_irqs(void);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 80fcb53057bc..1d3577f30d45 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -29,61 +29,104 @@
#include <asm/irq_regs.h>
struct irq_desc;
+struct irq_data;
typedef void (*irq_flow_handler_t)(unsigned int irq,
struct irq_desc *desc);
-
+typedef void (*irq_preflow_handler_t)(struct irq_data *data);
/*
* IRQ line status.
*
- * Bits 0-7 are reserved for the IRQF_* bits in linux/interrupt.h
+ * Bits 0-7 are the same as the IRQF_* bits in linux/interrupt.h
+ *
+ * IRQ_TYPE_NONE - default, unspecified type
+ * IRQ_TYPE_EDGE_RISING - rising edge triggered
+ * IRQ_TYPE_EDGE_FALLING - falling edge triggered
+ * IRQ_TYPE_EDGE_BOTH - rising and falling edge triggered
+ * IRQ_TYPE_LEVEL_HIGH - high level triggered
+ * IRQ_TYPE_LEVEL_LOW - low level triggered
+ * IRQ_TYPE_LEVEL_MASK - Mask to filter out the level bits
+ * IRQ_TYPE_SENSE_MASK - Mask for all the above bits
+ * IRQ_TYPE_PROBE - Special flag for probing in progress
+ *
+ * Bits which can be modified via irq_set/clear/modify_status_flags()
+ * IRQ_LEVEL - Interrupt is level type. Will be also
+ * updated in the code when the above trigger
+ * bits are modified via set_irq_type()
+ * IRQ_PER_CPU - Mark an interrupt PER_CPU. Will protect
+ * it from affinity setting
+ * IRQ_NOPROBE - Interrupt cannot be probed by autoprobing
+ * IRQ_NOREQUEST - Interrupt cannot be requested via
+ * request_irq()
+ * IRQ_NOAUTOEN - Interrupt is not automatically enabled in
+ * request/setup_irq()
+ * IRQ_NO_BALANCING - Interrupt cannot be balanced (affinity set)
+ * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context
+ * IRQ_NESTED_TRHEAD - Interrupt nests into another thread
+ *
+ * Deprecated bits. They are kept updated as long as
+ * CONFIG_GENERIC_HARDIRQS_NO_COMPAT is not set. Will go away soon. These bits
+ * are internal state of the core code and if you really need to acces
+ * them then talk to the genirq maintainer instead of hacking
+ * something weird.
*
- * IRQ types
*/
-#define IRQ_TYPE_NONE 0x00000000 /* Default, unspecified type */
-#define IRQ_TYPE_EDGE_RISING 0x00000001 /* Edge rising type */
-#define IRQ_TYPE_EDGE_FALLING 0x00000002 /* Edge falling type */
-#define IRQ_TYPE_EDGE_BOTH (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING)
-#define IRQ_TYPE_LEVEL_HIGH 0x00000004 /* Level high type */
-#define IRQ_TYPE_LEVEL_LOW 0x00000008 /* Level low type */
-#define IRQ_TYPE_SENSE_MASK 0x0000000f /* Mask of the above */
-#define IRQ_TYPE_PROBE 0x00000010 /* Probing in progress */
-
-/* Internal flags */
-#define IRQ_INPROGRESS 0x00000100 /* IRQ handler active - do not enter! */
-#define IRQ_DISABLED 0x00000200 /* IRQ disabled - do not enter! */
-#define IRQ_PENDING 0x00000400 /* IRQ pending - replay on enable */
-#define IRQ_REPLAY 0x00000800 /* IRQ has been replayed but not acked yet */
-#define IRQ_AUTODETECT 0x00001000 /* IRQ is being autodetected */
-#define IRQ_WAITING 0x00002000 /* IRQ not yet seen - for autodetection */
-#define IRQ_LEVEL 0x00004000 /* IRQ level triggered */
-#define IRQ_MASKED 0x00008000 /* IRQ masked - shouldn't be seen again */
-#define IRQ_PER_CPU 0x00010000 /* IRQ is per CPU */
-#define IRQ_NOPROBE 0x00020000 /* IRQ is not valid for probing */
-#define IRQ_NOREQUEST 0x00040000 /* IRQ cannot be requested */
-#define IRQ_NOAUTOEN 0x00080000 /* IRQ will not be enabled on request irq */
-#define IRQ_WAKEUP 0x00100000 /* IRQ triggers system wakeup */
-#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */
-#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */
-#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
-#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
-#define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/
-#define IRQ_SUSPENDED 0x04000000 /* IRQ has gone through suspend sequence */
-#define IRQ_ONESHOT 0x08000000 /* IRQ is not unmasked after hardirq */
-#define IRQ_NESTED_THREAD 0x10000000 /* IRQ is nested into another, no own handler thread */
+enum {
+ IRQ_TYPE_NONE = 0x00000000,
+ IRQ_TYPE_EDGE_RISING = 0x00000001,
+ IRQ_TYPE_EDGE_FALLING = 0x00000002,
+ IRQ_TYPE_EDGE_BOTH = (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_EDGE_RISING),
+ IRQ_TYPE_LEVEL_HIGH = 0x00000004,
+ IRQ_TYPE_LEVEL_LOW = 0x00000008,
+ IRQ_TYPE_LEVEL_MASK = (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH),
+ IRQ_TYPE_SENSE_MASK = 0x0000000f,
+
+ IRQ_TYPE_PROBE = 0x00000010,
+
+ IRQ_LEVEL = (1 << 8),
+ IRQ_PER_CPU = (1 << 9),
+ IRQ_NOPROBE = (1 << 10),
+ IRQ_NOREQUEST = (1 << 11),
+ IRQ_NOAUTOEN = (1 << 12),
+ IRQ_NO_BALANCING = (1 << 13),
+ IRQ_MOVE_PCNTXT = (1 << 14),
+ IRQ_NESTED_THREAD = (1 << 15),
+
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+ IRQ_INPROGRESS = (1 << 16),
+ IRQ_REPLAY = (1 << 17),
+ IRQ_WAITING = (1 << 18),
+ IRQ_DISABLED = (1 << 19),
+ IRQ_PENDING = (1 << 20),
+ IRQ_MASKED = (1 << 21),
+ IRQ_MOVE_PENDING = (1 << 22),
+ IRQ_AFFINITY_SET = (1 << 23),
+ IRQ_WAKEUP = (1 << 24),
+#endif
+};
#define IRQF_MODIFY_MASK \
(IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \
IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \
- IRQ_PER_CPU)
+ IRQ_PER_CPU | IRQ_NESTED_THREAD)
-#ifdef CONFIG_IRQ_PER_CPU
-# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
-# define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
-#else
-# define CHECK_IRQ_PER_CPU(var) 0
-# define IRQ_NO_BALANCING_MASK IRQ_NO_BALANCING
-#endif
+#define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING)
+
+static inline __deprecated bool CHECK_IRQ_PER_CPU(unsigned int status)
+{
+ return status & IRQ_PER_CPU;
+}
+
+/*
+ * Return value for chip->irq_set_affinity()
+ *
+ * IRQ_SET_MASK_OK - OK, core updates irq_data.affinity
+ * IRQ_SET_MASK_NOCPY - OK, chip did update irq_data.affinity
+ */
+enum {
+ IRQ_SET_MASK_OK = 0,
+ IRQ_SET_MASK_OK_NOCOPY,
+};
struct msi_desc;
@@ -91,6 +134,8 @@ struct msi_desc;
* struct irq_data - per irq and irq chip data passed down to chip functions
* @irq: interrupt number
* @node: node index useful for balancing
+ * @state_use_accessor: status information for irq chip functions.
+ * Use accessor functions to deal with it
* @chip: low level interrupt hardware access
* @handler_data: per-IRQ data for the irq_chip methods
* @chip_data: platform-specific per-chip private data for the chip
@@ -105,6 +150,7 @@ struct msi_desc;
struct irq_data {
unsigned int irq;
unsigned int node;
+ unsigned int state_use_accessors;
struct irq_chip *chip;
void *handler_data;
void *chip_data;
@@ -114,6 +160,80 @@ struct irq_data {
#endif
};
+/*
+ * Bit masks for irq_data.state
+ *
+ * IRQD_TRIGGER_MASK - Mask for the trigger type bits
+ * IRQD_SETAFFINITY_PENDING - Affinity setting is pending
+ * IRQD_NO_BALANCING - Balancing disabled for this IRQ
+ * IRQD_PER_CPU - Interrupt is per cpu
+ * IRQD_AFFINITY_SET - Interrupt affinity was set
+ * IRQD_LEVEL - Interrupt is level triggered
+ * IRQD_WAKEUP_STATE - Interrupt is configured for wakeup
+ * from suspend
+ * IRDQ_MOVE_PCNTXT - Interrupt can be moved in process
+ * context
+ */
+enum {
+ IRQD_TRIGGER_MASK = 0xf,
+ IRQD_SETAFFINITY_PENDING = (1 << 8),
+ IRQD_NO_BALANCING = (1 << 10),
+ IRQD_PER_CPU = (1 << 11),
+ IRQD_AFFINITY_SET = (1 << 12),
+ IRQD_LEVEL = (1 << 13),
+ IRQD_WAKEUP_STATE = (1 << 14),
+ IRQD_MOVE_PCNTXT = (1 << 15),
+};
+
+static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_SETAFFINITY_PENDING;
+}
+
+static inline bool irqd_is_per_cpu(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_PER_CPU;
+}
+
+static inline bool irqd_can_balance(struct irq_data *d)
+{
+ return !(d->state_use_accessors & (IRQD_PER_CPU | IRQD_NO_BALANCING));
+}
+
+static inline bool irqd_affinity_was_set(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_AFFINITY_SET;
+}
+
+static inline u32 irqd_get_trigger_type(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_TRIGGER_MASK;
+}
+
+/*
+ * Must only be called inside irq_chip.irq_set_type() functions.
+ */
+static inline void irqd_set_trigger_type(struct irq_data *d, u32 type)
+{
+ d->state_use_accessors &= ~IRQD_TRIGGER_MASK;
+ d->state_use_accessors |= type & IRQD_TRIGGER_MASK;
+}
+
+static inline bool irqd_is_level_type(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_LEVEL;
+}
+
+static inline bool irqd_is_wakeup_set(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_WAKEUP_STATE;
+}
+
+static inline bool irqd_can_move_in_process_context(struct irq_data *d)
+{
+ return d->state_use_accessors & IRQD_MOVE_PCNTXT;
+}
+
/**
* struct irq_chip - hardware interrupt chip descriptor
*
@@ -150,6 +270,7 @@ struct irq_data {
* @irq_set_wake: enable/disable power-management wake-on of an IRQ
* @irq_bus_lock: function to lock access to slow bus (i2c) chips
* @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips
+ * @flags: chip specific flags
*
* @release: release function solely used by UML
*/
@@ -196,12 +317,27 @@ struct irq_chip {
void (*irq_bus_lock)(struct irq_data *data);
void (*irq_bus_sync_unlock)(struct irq_data *data);
+ unsigned long flags;
+
/* Currently used only by UML, might disappear one day.*/
#ifdef CONFIG_IRQ_RELEASE_METHOD
void (*release)(unsigned int irq, void *dev_id);
#endif
};
+/*
+ * irq_chip specific flags
+ *
+ * IRQCHIP_SET_TYPE_MASKED: Mask before calling chip.irq_set_type()
+ * IRQCHIP_EOI_IF_HANDLED: Only issue irq_eoi() when irq was handled
+ * IRQCHIP_MASK_ON_SUSPEND: Mask non wake irqs in the suspend path
+ */
+enum {
+ IRQCHIP_SET_TYPE_MASKED = (1 << 0),
+ IRQCHIP_EOI_IF_HANDLED = (1 << 1),
+ IRQCHIP_MASK_ON_SUSPEND = (1 << 2),
+};
+
/* This include will go away once we isolated irq_desc usage to core code */
#include <linux/irqdesc.h>
@@ -218,7 +354,7 @@ struct irq_chip {
# define ARCH_IRQ_INIT_FLAGS 0
#endif
-#define IRQ_DEFAULT_INIT_FLAGS (IRQ_DISABLED | ARCH_IRQ_INIT_FLAGS)
+#define IRQ_DEFAULT_INIT_FLAGS ARCH_IRQ_INIT_FLAGS
struct irqaction;
extern int setup_irq(unsigned int irq, struct irqaction *new);
@@ -229,9 +365,13 @@ extern void remove_irq(unsigned int irq, struct irqaction *act);
#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
void move_native_irq(int irq);
void move_masked_irq(int irq);
+void irq_move_irq(struct irq_data *data);
+void irq_move_masked_irq(struct irq_data *data);
#else
static inline void move_native_irq(int irq) { }
static inline void move_masked_irq(int irq) { }
+static inline void irq_move_irq(struct irq_data *data) { }
+static inline void irq_move_masked_irq(struct irq_data *data) { }
#endif
extern int no_irq_affinity;
@@ -267,23 +407,23 @@ extern struct irq_chip no_irq_chip;
extern struct irq_chip dummy_irq_chip;
extern void
-set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
- irq_flow_handler_t handle);
-extern void
-set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
irq_flow_handler_t handle, const char *name);
+static inline void irq_set_chip_and_handler(unsigned int irq, struct irq_chip *chip,
+ irq_flow_handler_t handle)
+{
+ irq_set_chip_and_handler_name(irq, chip, handle, NULL);
+}
+
extern void
-__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
const char *name);
-/*
- * Set a highlevel flow handler for a given IRQ:
- */
static inline void
-set_irq_handler(unsigned int irq, irq_flow_handler_t handle)
+irq_set_handler(unsigned int irq, irq_flow_handler_t handle)
{
- __set_irq_handler(irq, handle, 0, NULL);
+ __irq_set_handler(irq, handle, 0, NULL);
}
/*
@@ -292,14 +432,11 @@ set_irq_handler(unsigned int irq, irq_flow_handler_t handle)
* IRQ_NOREQUEST and IRQ_NOPROBE)
*/
static inline void
-set_irq_chained_handler(unsigned int irq,
- irq_flow_handler_t handle)
+irq_set_chained_handler(unsigned int irq, irq_flow_handler_t handle)
{
- __set_irq_handler(irq, handle, 1, NULL);
+ __irq_set_handler(irq, handle, 1, NULL);
}
-extern void set_irq_nested_thread(unsigned int irq, int nest);
-
void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set);
static inline void irq_set_status_flags(unsigned int irq, unsigned long set)
@@ -312,16 +449,24 @@ static inline void irq_clear_status_flags(unsigned int irq, unsigned long clr)
irq_modify_status(irq, clr, 0);
}
-static inline void set_irq_noprobe(unsigned int irq)
+static inline void irq_set_noprobe(unsigned int irq)
{
irq_modify_status(irq, 0, IRQ_NOPROBE);
}
-static inline void set_irq_probe(unsigned int irq)
+static inline void irq_set_probe(unsigned int irq)
{
irq_modify_status(irq, IRQ_NOPROBE, 0);
}
+static inline void irq_set_nested_thread(unsigned int irq, bool nest)
+{
+ if (nest)
+ irq_set_status_flags(irq, IRQ_NESTED_THREAD);
+ else
+ irq_clear_status_flags(irq, IRQ_NESTED_THREAD);
+}
+
/* Handle dynamic irq creation and destruction */
extern unsigned int create_irq_nr(unsigned int irq_want, int node);
extern int create_irq(void);
@@ -338,14 +483,14 @@ static inline void dynamic_irq_init(unsigned int irq)
}
/* Set/get chip/data for an IRQ: */
-extern int set_irq_chip(unsigned int irq, struct irq_chip *chip);
-extern int set_irq_data(unsigned int irq, void *data);
-extern int set_irq_chip_data(unsigned int irq, void *data);
-extern int set_irq_type(unsigned int irq, unsigned int type);
-extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
+extern int irq_set_chip(unsigned int irq, struct irq_chip *chip);
+extern int irq_set_handler_data(unsigned int irq, void *data);
+extern int irq_set_chip_data(unsigned int irq, void *data);
+extern int irq_set_irq_type(unsigned int irq, unsigned int type);
+extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
extern struct irq_data *irq_get_irq_data(unsigned int irq);
-static inline struct irq_chip *get_irq_chip(unsigned int irq)
+static inline struct irq_chip *irq_get_chip(unsigned int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
return d ? d->chip : NULL;
@@ -356,7 +501,7 @@ static inline struct irq_chip *irq_data_get_irq_chip(struct irq_data *d)
return d->chip;
}
-static inline void *get_irq_chip_data(unsigned int irq)
+static inline void *irq_get_chip_data(unsigned int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
return d ? d->chip_data : NULL;
@@ -367,18 +512,18 @@ static inline void *irq_data_get_irq_chip_data(struct irq_data *d)
return d->chip_data;
}
-static inline void *get_irq_data(unsigned int irq)
+static inline void *irq_get_handler_data(unsigned int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
return d ? d->handler_data : NULL;
}
-static inline void *irq_data_get_irq_data(struct irq_data *d)
+static inline void *irq_data_get_irq_handler_data(struct irq_data *d)
{
return d->handler_data;
}
-static inline struct msi_desc *get_irq_msi(unsigned int irq)
+static inline struct msi_desc *irq_get_msi_desc(unsigned int irq)
{
struct irq_data *d = irq_get_irq_data(irq);
return d ? d->msi_desc : NULL;
@@ -389,6 +534,89 @@ static inline struct msi_desc *irq_data_get_msi(struct irq_data *d)
return d->msi_desc;
}
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+/* Please do not use: Use the replacement functions instead */
+static inline int set_irq_chip(unsigned int irq, struct irq_chip *chip)
+{
+ return irq_set_chip(irq, chip);
+}
+static inline int set_irq_data(unsigned int irq, void *data)
+{
+ return irq_set_handler_data(irq, data);
+}
+static inline int set_irq_chip_data(unsigned int irq, void *data)
+{
+ return irq_set_chip_data(irq, data);
+}
+static inline int set_irq_type(unsigned int irq, unsigned int type)
+{
+ return irq_set_irq_type(irq, type);
+}
+static inline int set_irq_msi(unsigned int irq, struct msi_desc *entry)
+{
+ return irq_set_msi_desc(irq, entry);
+}
+static inline struct irq_chip *get_irq_chip(unsigned int irq)
+{
+ return irq_get_chip(irq);
+}
+static inline void *get_irq_chip_data(unsigned int irq)
+{
+ return irq_get_chip_data(irq);
+}
+static inline void *get_irq_data(unsigned int irq)
+{
+ return irq_get_handler_data(irq);
+}
+static inline void *irq_data_get_irq_data(struct irq_data *d)
+{
+ return irq_data_get_irq_handler_data(d);
+}
+static inline struct msi_desc *get_irq_msi(unsigned int irq)
+{
+ return irq_get_msi_desc(irq);
+}
+static inline void set_irq_noprobe(unsigned int irq)
+{
+ irq_set_noprobe(irq);
+}
+static inline void set_irq_probe(unsigned int irq)
+{
+ irq_set_probe(irq);
+}
+static inline void set_irq_nested_thread(unsigned int irq, int nest)
+{
+ irq_set_nested_thread(irq, nest);
+}
+static inline void
+set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+ irq_flow_handler_t handle, const char *name)
+{
+ irq_set_chip_and_handler_name(irq, chip, handle, name);
+}
+static inline void
+set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
+ irq_flow_handler_t handle)
+{
+ irq_set_chip_and_handler(irq, chip, handle);
+}
+static inline void
+__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+ const char *name)
+{
+ __irq_set_handler(irq, handle, is_chained, name);
+}
+static inline void set_irq_handler(unsigned int irq, irq_flow_handler_t handle)
+{
+ irq_set_handler(irq, handle);
+}
+static inline void
+set_irq_chained_handler(unsigned int irq, irq_flow_handler_t handle)
+{
+ irq_set_chained_handler(irq, handle);
+}
+#endif
+
int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
void irq_free_descs(unsigned int irq, unsigned int cnt);
int irq_reserve_irqs(unsigned int from, unsigned int cnt);
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index c1a95b7b58de..00218371518b 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -8,6 +8,7 @@
* For now it's included from <linux/irq.h>
*/
+struct irq_affinity_notify;
struct proc_dir_entry;
struct timer_rand_state;
/**
@@ -18,13 +19,16 @@ struct timer_rand_state;
* @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()]
* @action: the irq action chain
* @status: status information
+ * @core_internal_state__do_not_mess_with_it: core internal status information
* @depth: disable-depth, for nested irq_disable() calls
* @wake_depth: enable depth, for multiple set_irq_wake() callers
* @irq_count: stats field to detect stalled irqs
* @last_unhandled: aging timer for unhandled count
* @irqs_unhandled: stats field for spurious unhandled interrupts
* @lock: locking for SMP
+ * @affinity_notify: context for notification of affinity changes
* @pending_mask: pending rebalanced interrupts
+ * @threads_oneshot: bitfield to handle shared oneshot threads
* @threads_active: number of irqaction threads currently running
* @wait_for_threads: wait queue for sync_irq to wait for threaded handlers
* @dir: /proc/irq/ procfs entry
@@ -45,6 +49,7 @@ struct irq_desc {
struct {
unsigned int irq;
unsigned int node;
+ unsigned int pad_do_not_even_think_about_it;
struct irq_chip *chip;
void *handler_data;
void *chip_data;
@@ -59,9 +64,16 @@ struct irq_desc {
struct timer_rand_state *timer_rand_state;
unsigned int __percpu *kstat_irqs;
irq_flow_handler_t handle_irq;
+#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
+ irq_preflow_handler_t preflow_handler;
+#endif
struct irqaction *action; /* IRQ action list */
+#ifdef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+ unsigned int status_use_accessors;
+#else
unsigned int status; /* IRQ status */
-
+#endif
+ unsigned int core_internal_state__do_not_mess_with_it;
unsigned int depth; /* nested irq disables */
unsigned int wake_depth; /* nested wake enables */
unsigned int irq_count; /* For detecting broken IRQs */
@@ -70,10 +82,12 @@ struct irq_desc {
raw_spinlock_t lock;
#ifdef CONFIG_SMP
const struct cpumask *affinity_hint;
+ struct irq_affinity_notify *affinity_notify;
#ifdef CONFIG_GENERIC_PENDING_IRQ
cpumask_var_t pending_mask;
#endif
#endif
+ unsigned long threads_oneshot;
atomic_t threads_active;
wait_queue_head_t wait_for_threads;
#ifdef CONFIG_PROC_FS
@@ -95,10 +109,51 @@ static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
#ifdef CONFIG_GENERIC_HARDIRQS
-#define get_irq_desc_chip(desc) ((desc)->irq_data.chip)
-#define get_irq_desc_chip_data(desc) ((desc)->irq_data.chip_data)
-#define get_irq_desc_data(desc) ((desc)->irq_data.handler_data)
-#define get_irq_desc_msi(desc) ((desc)->irq_data.msi_desc)
+static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
+{
+ return &desc->irq_data;
+}
+
+static inline struct irq_chip *irq_desc_get_chip(struct irq_desc *desc)
+{
+ return desc->irq_data.chip;
+}
+
+static inline void *irq_desc_get_chip_data(struct irq_desc *desc)
+{
+ return desc->irq_data.chip_data;
+}
+
+static inline void *irq_desc_get_handler_data(struct irq_desc *desc)
+{
+ return desc->irq_data.handler_data;
+}
+
+static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
+{
+ return desc->irq_data.msi_desc;
+}
+
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+static inline struct irq_chip *get_irq_desc_chip(struct irq_desc *desc)
+{
+ return irq_desc_get_chip(desc);
+}
+static inline void *get_irq_desc_data(struct irq_desc *desc)
+{
+ return irq_desc_get_handler_data(desc);
+}
+
+static inline void *get_irq_desc_chip_data(struct irq_desc *desc)
+{
+ return irq_desc_get_chip_data(desc);
+}
+
+static inline struct msi_desc *get_irq_desc_msi(struct irq_desc *desc)
+{
+ return irq_desc_get_msi_desc(desc);
+}
+#endif
/*
* Architectures call this to let the generic IRQ layer
@@ -123,6 +178,7 @@ static inline int irq_has_action(unsigned int irq)
return desc->action != NULL;
}
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
static inline int irq_balancing_disabled(unsigned int irq)
{
struct irq_desc *desc;
@@ -130,6 +186,7 @@ static inline int irq_balancing_disabled(unsigned int irq)
desc = irq_to_desc(irq);
return desc->status & IRQ_NO_BALANCING_MASK;
}
+#endif
/* caller has locked the irq_desc and both params are valid */
static inline void __set_irq_handler_unlocked(int irq,
@@ -140,6 +197,17 @@ static inline void __set_irq_handler_unlocked(int irq,
desc = irq_to_desc(irq);
desc->handle_irq = handler;
}
+
+#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
+static inline void
+__irq_set_preflow_handler(unsigned int irq, irq_preflow_handler_t handler)
+{
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+ desc->preflow_handler = handler;
+}
+#endif
#endif
#endif
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 6811f4bfc6e7..922aa313c9f9 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x);
extern unsigned long clock_t_to_jiffies(unsigned long x);
extern u64 jiffies_64_to_clock_t(u64 x);
extern u64 nsec_to_clock_t(u64 x);
+extern u64 nsecs_to_jiffies64(u64 n);
extern unsigned long nsecs_to_jiffies(u64 n);
#define TIMESTAMP_SIZE 30
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 65833d4d5998..9efd081bb31e 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -41,6 +41,9 @@ struct key_type {
*/
size_t def_datalen;
+ /* vet a description */
+ int (*vet_description)(const char *description);
+
/* instantiate a key of this type
* - this method should call key_payload_reserve() to determine if the
* user's quota will hold the payload
@@ -102,11 +105,20 @@ extern int key_instantiate_and_link(struct key *key,
size_t datalen,
struct key *keyring,
struct key *instkey);
-extern int key_negate_and_link(struct key *key,
+extern int key_reject_and_link(struct key *key,
unsigned timeout,
+ unsigned error,
struct key *keyring,
struct key *instkey);
extern void complete_request_key(struct key_construction *cons, int error);
+static inline int key_negate_and_link(struct key *key,
+ unsigned timeout,
+ struct key *keyring,
+ struct key *instkey)
+{
+ return key_reject_and_link(key, timeout, ENOKEY, keyring, instkey);
+}
+
#endif /* CONFIG_KEYS */
#endif /* _LINUX_KEY_TYPE_H */
diff --git a/include/linux/key.h b/include/linux/key.h
index 3db0adce1fda..b2bb01719561 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -170,6 +170,7 @@ struct key {
struct list_head link;
unsigned long x[2];
void *p[2];
+ int reject_error;
} type_data;
/* key data
@@ -275,6 +276,10 @@ static inline key_serial_t key_serial(struct key *key)
return key ? key->serial : 0;
}
+#define rcu_dereference_key(KEY) \
+ (rcu_dereference_protected((KEY)->payload.rcudata, \
+ rwsem_is_locked(&((struct key *)(KEY))->sem)))
+
#ifdef CONFIG_SYSCTL
extern ctl_table key_sysctls[];
#endif
diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h
index bd383f1944fb..9b0b865ce622 100644
--- a/include/linux/keyctl.h
+++ b/include/linux/keyctl.h
@@ -53,5 +53,7 @@
#define KEYCTL_ASSUME_AUTHORITY 16 /* assume request_key() authorisation */
#define KEYCTL_GET_SECURITY 17 /* get key security label */
#define KEYCTL_SESSION_TO_PARENT 18 /* apply session keyring to parent process */
+#define KEYCTL_REJECT 19 /* reject a partially constructed key */
+#define KEYCTL_INSTANTIATE_IOV 20 /* instantiate a partially constructed key */
#endif /* _LINUX_KEYCTL_H */
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index ce0775aa64c3..7ff16f7d3ed4 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -64,7 +64,7 @@ struct kthread_work {
};
#define KTHREAD_WORKER_INIT(worker) { \
- .lock = SPIN_LOCK_UNLOCKED, \
+ .lock = __SPIN_LOCK_UNLOCKED((worker).lock), \
.work_list = LIST_HEAD_INIT((worker).work_list), \
}
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c9c5d7ad1a2b..c71f46960f39 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -179,10 +179,6 @@ enum {
ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */
/* (doesn't imply presence) */
ATA_FLAG_SATA = (1 << 1),
- ATA_FLAG_NO_LEGACY = (1 << 2), /* no legacy mode check */
- ATA_FLAG_MMIO = (1 << 3), /* use MMIO, not PIO */
- ATA_FLAG_SRST = (1 << 4), /* (obsolete) use ATA SRST, not E.D.D. */
- ATA_FLAG_SATA_RESET = (1 << 5), /* (obsolete) use COMRESET */
ATA_FLAG_NO_ATAPI = (1 << 6), /* No ATAPI support */
ATA_FLAG_PIO_DMA = (1 << 7), /* PIO cmds via DMA */
ATA_FLAG_PIO_LBA48 = (1 << 8), /* Host DMA engine is LBA28 only */
@@ -198,7 +194,6 @@ enum {
ATA_FLAG_ACPI_SATA = (1 << 17), /* need native SATA ACPI layout */
ATA_FLAG_AN = (1 << 18), /* controller supports AN */
ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */
- ATA_FLAG_LPM = (1 << 20), /* driver can handle LPM */
ATA_FLAG_EM = (1 << 21), /* driver supports enclosure
* management */
ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity
@@ -1050,6 +1045,8 @@ extern int ata_scsi_change_queue_depth(struct scsi_device *sdev,
int queue_depth, int reason);
extern struct ata_device *ata_dev_pair(struct ata_device *adev);
extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev);
+extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap);
+extern void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, struct list_head *eh_q);
extern int ata_cable_40wire(struct ata_port *ap);
extern int ata_cable_80wire(struct ata_port *ap);
@@ -1613,6 +1610,9 @@ extern void ata_sff_irq_on(struct ata_port *ap);
extern void ata_sff_irq_clear(struct ata_port *ap);
extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
u8 status, int in_wq);
+extern void ata_sff_queue_work(struct work_struct *work);
+extern void ata_sff_queue_delayed_work(struct delayed_work *dwork,
+ unsigned long delay);
extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay);
extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc);
extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc);
diff --git a/include/linux/list.h b/include/linux/list.h
index 9a5f8a71810c..3a54266a1e85 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -96,6 +96,11 @@ static inline void __list_del(struct list_head * prev, struct list_head * next)
* in an undefined state.
*/
#ifndef CONFIG_DEBUG_LIST
+static inline void __list_del_entry(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
static inline void list_del(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
@@ -103,6 +108,7 @@ static inline void list_del(struct list_head *entry)
entry->prev = LIST_POISON2;
}
#else
+extern void __list_del_entry(struct list_head *entry);
extern void list_del(struct list_head *entry);
#endif
@@ -135,7 +141,7 @@ static inline void list_replace_init(struct list_head *old,
*/
static inline void list_del_init(struct list_head *entry)
{
- __list_del(entry->prev, entry->next);
+ __list_del_entry(entry);
INIT_LIST_HEAD(entry);
}
@@ -146,7 +152,7 @@ static inline void list_del_init(struct list_head *entry)
*/
static inline void list_move(struct list_head *list, struct list_head *head)
{
- __list_del(list->prev, list->next);
+ __list_del_entry(list);
list_add(list, head);
}
@@ -158,7 +164,7 @@ static inline void list_move(struct list_head *list, struct list_head *head)
static inline void list_move_tail(struct list_head *list,
struct list_head *head)
{
- __list_del(list->prev, list->next);
+ __list_del_entry(list);
list_add_tail(list, head);
}
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index 3fd36845ca45..ef4f0b6083a3 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -71,6 +71,7 @@ struct wm8994 {
u16 irq_masks_cache[WM8994_NUM_IRQ_REGS];
/* Used over suspend/resume */
+ bool suspended;
u16 ldo_regs[WM8994_NUM_LDO_REGS];
u16 gpio_regs[WM8994_NUM_GPIO_REGS];
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6385fc17ad4..679300c050f5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1309,8 +1309,6 @@ int add_from_early_node_map(struct range *range, int az,
int nr_range, int nid);
u64 __init find_memory_core_early(int nid, u64 size, u64 align,
u64 goal, u64 limit);
-void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
- u64 goal, u64 limit);
typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
extern void sparse_memory_present_with_active_regions(int nid);
diff --git a/include/linux/module.h b/include/linux/module.h
index 9bdf27c7615b..5de42043dff0 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -62,7 +62,7 @@ struct module_version_attribute {
struct module_attribute mattr;
const char *module_name;
const char *version;
-};
+} __attribute__ ((__aligned__(sizeof(void *))));
struct module_kobject
{
diff --git a/include/linux/namei.h b/include/linux/namei.h
index f276d4fa01fc..9c8603872c36 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -19,7 +19,6 @@ struct nameidata {
struct path path;
struct qstr last;
struct path root;
- struct file *file;
struct inode *inode; /* path.dentry.d_inode */
unsigned int flags;
unsigned seq;
@@ -63,6 +62,10 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
#define LOOKUP_EXCL 0x0400
#define LOOKUP_RENAME_TARGET 0x0800
+#define LOOKUP_JUMPED 0x1000
+#define LOOKUP_ROOT 0x2000
+#define LOOKUP_EMPTY 0x4000
+
extern int user_path_at(int, const char __user *, unsigned, struct path *);
#define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path)
@@ -72,7 +75,7 @@ extern int user_path_at(int, const char __user *, unsigned, struct path *);
extern int kern_path(const char *, unsigned, struct path *);
-extern int path_lookup(const char *, unsigned, struct nameidata *);
+extern int kern_path_parent(const char *, struct nameidata *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct nameidata *);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d971346b0340..71caf7a5e6c6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2392,6 +2392,9 @@ extern int netdev_notice(const struct net_device *dev, const char *format, ...)
extern int netdev_info(const struct net_device *dev, const char *format, ...)
__attribute__ ((format (printf, 2, 3)));
+#define MODULE_ALIAS_NETDEV(device) \
+ MODULE_ALIAS("netdev-" device)
+
#if defined(DEBUG)
#define netdev_dbg(__dev, format, args...) \
netdev_printk(KERN_DEBUG, __dev, format, ##args)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b197563913bf..3e112de12d8d 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -68,11 +68,7 @@ struct nfs_client {
unsigned char cl_id_uniquifier;
u32 cl_cb_ident; /* v4.0 callback identifier */
const struct nfs4_minor_version_ops *cl_mvops;
-#endif /* CONFIG_NFS_V4 */
-#ifdef CONFIG_NFS_V4_1
- /* clientid returned from EXCHANGE_ID, used by session operations */
- u64 cl_ex_clid;
/* The sequence id to use for the next CREATE_SESSION */
u32 cl_seqid;
/* The flags used for obtaining the clientid during EXCHANGE_ID */
@@ -80,7 +76,7 @@ struct nfs_client {
struct nfs4_session *cl_session; /* sharred session */
struct list_head cl_layouts;
struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_NFS_V4 */
#ifdef CONFIG_NFS_FSCACHE
struct fscache_cookie *fscache; /* client index cache cookie */
@@ -185,7 +181,7 @@ struct nfs_server {
/* maximum number of slots to use */
#define NFS4_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_NFS_V4)
/* Sessions */
#define SLOT_TABLE_SZ (NFS4_MAX_SLOT_TABLE/(8*sizeof(long)))
@@ -225,5 +221,5 @@ struct nfs4_session {
struct nfs_client *clp;
};
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_NFS_V4 */
#endif
diff --git a/include/linux/of.h b/include/linux/of.h
index cad7cf0ab278..266db1d0baa9 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -23,8 +23,6 @@
#include <asm/byteorder.h>
-#ifdef CONFIG_OF
-
typedef u32 phandle;
typedef u32 ihandle;
@@ -65,11 +63,18 @@ struct device_node {
#endif
};
+#ifdef CONFIG_OF
+
/* Pointer for first entry in chain of all nodes. */
extern struct device_node *allnodes;
extern struct device_node *of_chosen;
extern rwlock_t devtree_lock;
+static inline bool of_have_populated_dt(void)
+{
+ return allnodes != NULL;
+}
+
static inline bool of_node_is_root(const struct device_node *node)
{
return node && (node->parent == NULL);
@@ -222,5 +227,12 @@ extern void of_attach_node(struct device_node *);
extern void of_detach_node(struct device_node *);
#endif
+#else
+
+static inline bool of_have_populated_dt(void)
+{
+ return false;
+}
+
#endif /* CONFIG_OF */
#endif /* _LINUX_OF_H */
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
new file mode 100644
index 000000000000..85a27b650d76
--- /dev/null
+++ b/include/linux/of_pci.h
@@ -0,0 +1,9 @@
+#ifndef __OF_PCI_H
+#define __OF_PCI_H
+
+#include <linux/pci.h>
+
+struct pci_dev;
+struct of_irq;
+int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq);
+#endif
diff --git a/include/linux/pata_arasan_cf_data.h b/include/linux/pata_arasan_cf_data.h
new file mode 100644
index 000000000000..a6ee9aa898bb
--- /dev/null
+++ b/include/linux/pata_arasan_cf_data.h
@@ -0,0 +1,49 @@
+/*
+ * include/linux/pata_arasan_cf_data.h
+ *
+ * Arasan Compact Flash host controller platform data header file
+ *
+ * Copyright (C) 2011 ST Microelectronics
+ * Viresh Kumar <viresh.kumar@st.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef _PATA_ARASAN_CF_DATA_H
+#define _PATA_ARASAN_CF_DATA_H
+
+#include <linux/platform_device.h>
+
+struct arasan_cf_pdata {
+ u8 cf_if_clk;
+ #define CF_IF_CLK_100M (0x0)
+ #define CF_IF_CLK_75M (0x1)
+ #define CF_IF_CLK_66M (0x2)
+ #define CF_IF_CLK_50M (0x3)
+ #define CF_IF_CLK_40M (0x4)
+ #define CF_IF_CLK_33M (0x5)
+ #define CF_IF_CLK_25M (0x6)
+ #define CF_IF_CLK_125M (0x7)
+ #define CF_IF_CLK_150M (0x8)
+ #define CF_IF_CLK_166M (0x9)
+ #define CF_IF_CLK_200M (0xA)
+ /*
+ * Platform specific incapabilities of CF controller is handled via
+ * quirks
+ */
+ u32 quirk;
+ #define CF_BROKEN_PIO (1)
+ #define CF_BROKEN_MWDMA (1 << 1)
+ #define CF_BROKEN_UDMA (1 << 2)
+ /* This is platform specific data for the DMA controller */
+ void *dma_priv;
+};
+
+static inline void
+set_arasan_cf_pdata(struct platform_device *pdev, struct arasan_cf_pdata *data)
+{
+ pdev->dev.platform_data = data;
+}
+#endif /* _PATA_ARASAN_CF_DATA_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3adb06ebf841..580de67f318b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -518,6 +518,7 @@
#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603
+#define PCI_DEVICE_ID_AMD_15H_NB_LINK 0x1604
#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 27c3c6fcfad3..3a5c4449fd36 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -255,6 +255,30 @@ extern void __bad_size_call_parameter(void);
pscr2_ret__; \
})
+/*
+ * Special handling for cmpxchg_double. cmpxchg_double is passed two
+ * percpu variables. The first has to be aligned to a double word
+ * boundary and the second has to follow directly thereafter.
+ */
+#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \
+({ \
+ bool pdcrb_ret__; \
+ __verify_pcpu_ptr(&pcp1); \
+ BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \
+ VM_BUG_ON((unsigned long)(&pcp1) % (2 * sizeof(pcp1))); \
+ VM_BUG_ON((unsigned long)(&pcp2) != \
+ (unsigned long)(&pcp1) + sizeof(pcp1)); \
+ switch(sizeof(pcp1)) { \
+ case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \
+ case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \
+ case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \
+ case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \
+ default: \
+ __bad_size_call_parameter(); break; \
+ } \
+ pdcrb_ret__; \
+})
+
#define __pcpu_size_call(stem, variable, ...) \
do { \
__verify_pcpu_ptr(&(variable)); \
@@ -501,6 +525,45 @@ do { \
#endif
/*
+ * cmpxchg_double replaces two adjacent scalars at once. The first
+ * two parameters are per cpu variables which have to be of the same
+ * size. A truth value is returned to indicate success or failure
+ * (since a double register result is difficult to handle). There is
+ * very limited hardware support for these operations, so only certain
+ * sizes may work.
+ */
+#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+({ \
+ int ret__; \
+ preempt_disable(); \
+ ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \
+ oval1, oval2, nval1, nval2); \
+ preempt_enable(); \
+ ret__; \
+})
+
+#ifndef this_cpu_cmpxchg_double
+# ifndef this_cpu_cmpxchg_double_1
+# define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef this_cpu_cmpxchg_double_2
+# define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef this_cpu_cmpxchg_double_4
+# define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef this_cpu_cmpxchg_double_8
+# define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+#endif
+
+/*
* Generic percpu operations that do not require preemption handling.
* Either we do not care about races or the caller has the
* responsibility of handling preemptions issues. Arch code can still
@@ -703,6 +766,39 @@ do { \
__pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval)
#endif
+#define __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+({ \
+ int __ret = 0; \
+ if (__this_cpu_read(pcp1) == (oval1) && \
+ __this_cpu_read(pcp2) == (oval2)) { \
+ __this_cpu_write(pcp1, (nval1)); \
+ __this_cpu_write(pcp2, (nval2)); \
+ __ret = 1; \
+ } \
+ (__ret); \
+})
+
+#ifndef __this_cpu_cmpxchg_double
+# ifndef __this_cpu_cmpxchg_double_1
+# define __this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef __this_cpu_cmpxchg_double_2
+# define __this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef __this_cpu_cmpxchg_double_4
+# define __this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef __this_cpu_cmpxchg_double_8
+# define __this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __pcpu_double_call_return_bool(__this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+#endif
+
/*
* IRQ safe versions of the per cpu RMW operations. Note that these operations
* are *not* safe against modification of the same variable from another
@@ -823,4 +919,36 @@ do { \
__pcpu_size_call_return2(irqsafe_cpu_cmpxchg_, (pcp), oval, nval)
#endif
+#define irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+({ \
+ int ret__; \
+ unsigned long flags; \
+ local_irq_save(flags); \
+ ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \
+ oval1, oval2, nval1, nval2); \
+ local_irq_restore(flags); \
+ ret__; \
+})
+
+#ifndef irqsafe_cpu_cmpxchg_double
+# ifndef irqsafe_cpu_cmpxchg_double_1
+# define irqsafe_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef irqsafe_cpu_cmpxchg_double_2
+# define irqsafe_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef irqsafe_cpu_cmpxchg_double_4
+# define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# ifndef irqsafe_cpu_cmpxchg_double_8
+# define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+# endif
+# define irqsafe_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
+ __pcpu_double_call_return_int(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))
+#endif
+
#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index dda5b0a3ff60..614615b8d42b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -225,8 +225,14 @@ struct perf_event_attr {
};
__u32 bp_type;
- __u64 bp_addr;
- __u64 bp_len;
+ union {
+ __u64 bp_addr;
+ __u64 config1; /* extension of config */
+ };
+ union {
+ __u64 bp_len;
+ __u64 config2; /* extension of config1 */
+ };
};
/*
@@ -464,6 +470,7 @@ enum perf_callchain_context {
#define PERF_FLAG_FD_NO_GROUP (1U << 0)
#define PERF_FLAG_FD_OUTPUT (1U << 1)
+#define PERF_FLAG_PID_CGROUP (1U << 2) /* pid=cgroup id, per-cpu mode only */
#ifdef __KERNEL__
/*
@@ -471,6 +478,7 @@ enum perf_callchain_context {
*/
#ifdef CONFIG_PERF_EVENTS
+# include <linux/cgroup.h>
# include <asm/perf_event.h>
# include <asm/local64.h>
#endif
@@ -539,6 +547,9 @@ struct hw_perf_event {
unsigned long event_base;
int idx;
int last_cpu;
+ unsigned int extra_reg;
+ u64 extra_config;
+ int extra_alloc;
};
struct { /* software */
struct hrtimer hrtimer;
@@ -716,6 +727,22 @@ struct swevent_hlist {
#define PERF_ATTACH_GROUP 0x02
#define PERF_ATTACH_TASK 0x04
+#ifdef CONFIG_CGROUP_PERF
+/*
+ * perf_cgroup_info keeps track of time_enabled for a cgroup.
+ * This is a per-cpu dynamically allocated data structure.
+ */
+struct perf_cgroup_info {
+ u64 time;
+ u64 timestamp;
+};
+
+struct perf_cgroup {
+ struct cgroup_subsys_state css;
+ struct perf_cgroup_info *info; /* timing info, one per cpu */
+};
+#endif
+
/**
* struct perf_event - performance event kernel representation:
*/
@@ -832,6 +859,11 @@ struct perf_event {
struct event_filter *filter;
#endif
+#ifdef CONFIG_CGROUP_PERF
+ struct perf_cgroup *cgrp; /* cgroup event is attach to */
+ int cgrp_defer_enabled;
+#endif
+
#endif /* CONFIG_PERF_EVENTS */
};
@@ -886,6 +918,7 @@ struct perf_event_context {
u64 generation;
int pin_count;
struct rcu_head rcu_head;
+ int nr_cgroups; /* cgroup events present */
};
/*
@@ -905,6 +938,9 @@ struct perf_cpu_context {
struct list_head rotation_list;
int jiffies_interval;
struct pmu *active_pmu;
+#ifdef CONFIG_CGROUP_PERF
+ struct perf_cgroup *cgrp;
+#endif
};
struct perf_output_handle {
@@ -1040,11 +1076,11 @@ have_event:
__perf_sw_event(event_id, nr, nmi, regs, addr);
}
-extern atomic_t perf_task_events;
+extern atomic_t perf_sched_events;
static inline void perf_event_task_sched_in(struct task_struct *task)
{
- COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
+ COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task));
}
static inline
@@ -1052,7 +1088,7 @@ void perf_event_task_sched_out(struct task_struct *task, struct task_struct *nex
{
perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
- COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
+ COND_STMT(&perf_sched_events, __perf_event_task_sched_out(task, next));
}
extern void perf_event_mmap(struct vm_area_struct *vma);
@@ -1083,6 +1119,10 @@ extern int sysctl_perf_event_paranoid;
extern int sysctl_perf_event_mlock;
extern int sysctl_perf_event_sample_rate;
+extern int perf_proc_update_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+
static inline bool perf_paranoid_tracepoint_raw(void)
{
return sysctl_perf_event_paranoid > -1;
diff --git a/include/linux/plist.h b/include/linux/plist.h
index 7254eda078e5..c9b9f322c8d8 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -31,15 +31,17 @@
*
* Simple ASCII art explanation:
*
- * |HEAD |
- * | |
- * |prio_list.prev|<------------------------------------|
- * |prio_list.next|<->|pl|<->|pl|<--------------->|pl|<-|
- * |10 | |10| |21| |21| |21| |40| (prio)
- * | | | | | | | | | | | |
- * | | | | | | | | | | | |
- * |node_list.next|<->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<-|
- * |node_list.prev|<------------------------------------|
+ * pl:prio_list (only for plist_node)
+ * nl:node_list
+ * HEAD| NODE(S)
+ * |
+ * ||------------------------------------|
+ * ||->|pl|<->|pl|<--------------->|pl|<-|
+ * | |10| |21| |21| |21| |40| (prio)
+ * | | | | | | | | | | |
+ * | | | | | | | | | | |
+ * |->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<->|nl|<-|
+ * |-------------------------------------------|
*
* The nodes on the prio_list list are sorted by priority to simplify
* the insertion of new nodes. There are no nodes with duplicate
@@ -78,7 +80,6 @@
#include <linux/spinlock_types.h>
struct plist_head {
- struct list_head prio_list;
struct list_head node_list;
#ifdef CONFIG_DEBUG_PI_LIST
raw_spinlock_t *rawlock;
@@ -88,7 +89,8 @@ struct plist_head {
struct plist_node {
int prio;
- struct plist_head plist;
+ struct list_head prio_list;
+ struct list_head node_list;
};
#ifdef CONFIG_DEBUG_PI_LIST
@@ -100,7 +102,6 @@ struct plist_node {
#endif
#define _PLIST_HEAD_INIT(head) \
- .prio_list = LIST_HEAD_INIT((head).prio_list), \
.node_list = LIST_HEAD_INIT((head).node_list)
/**
@@ -133,7 +134,8 @@ struct plist_node {
#define PLIST_NODE_INIT(node, __prio) \
{ \
.prio = (__prio), \
- .plist = { _PLIST_HEAD_INIT((node).plist) }, \
+ .prio_list = LIST_HEAD_INIT((node).prio_list), \
+ .node_list = LIST_HEAD_INIT((node).node_list), \
}
/**
@@ -144,7 +146,6 @@ struct plist_node {
static inline void
plist_head_init(struct plist_head *head, spinlock_t *lock)
{
- INIT_LIST_HEAD(&head->prio_list);
INIT_LIST_HEAD(&head->node_list);
#ifdef CONFIG_DEBUG_PI_LIST
head->spinlock = lock;
@@ -160,7 +161,6 @@ plist_head_init(struct plist_head *head, spinlock_t *lock)
static inline void
plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock)
{
- INIT_LIST_HEAD(&head->prio_list);
INIT_LIST_HEAD(&head->node_list);
#ifdef CONFIG_DEBUG_PI_LIST
head->rawlock = lock;
@@ -176,7 +176,8 @@ plist_head_init_raw(struct plist_head *head, raw_spinlock_t *lock)
static inline void plist_node_init(struct plist_node *node, int prio)
{
node->prio = prio;
- plist_head_init(&node->plist, NULL);
+ INIT_LIST_HEAD(&node->prio_list);
+ INIT_LIST_HEAD(&node->node_list);
}
extern void plist_add(struct plist_node *node, struct plist_head *head);
@@ -188,7 +189,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
* @head: the head for your list
*/
#define plist_for_each(pos, head) \
- list_for_each_entry(pos, &(head)->node_list, plist.node_list)
+ list_for_each_entry(pos, &(head)->node_list, node_list)
/**
* plist_for_each_safe - iterate safely over a plist of given type
@@ -199,7 +200,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
* Iterate over a plist of given type, safe against removal of list entry.
*/
#define plist_for_each_safe(pos, n, head) \
- list_for_each_entry_safe(pos, n, &(head)->node_list, plist.node_list)
+ list_for_each_entry_safe(pos, n, &(head)->node_list, node_list)
/**
* plist_for_each_entry - iterate over list of given type
@@ -208,7 +209,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
* @mem: the name of the list_struct within the struct
*/
#define plist_for_each_entry(pos, head, mem) \
- list_for_each_entry(pos, &(head)->node_list, mem.plist.node_list)
+ list_for_each_entry(pos, &(head)->node_list, mem.node_list)
/**
* plist_for_each_entry_safe - iterate safely over list of given type
@@ -220,7 +221,7 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
* Iterate over list of given type, safe against removal of list entry.
*/
#define plist_for_each_entry_safe(pos, n, head, m) \
- list_for_each_entry_safe(pos, n, &(head)->node_list, m.plist.node_list)
+ list_for_each_entry_safe(pos, n, &(head)->node_list, m.node_list)
/**
* plist_head_empty - return !0 if a plist_head is empty
@@ -237,7 +238,7 @@ static inline int plist_head_empty(const struct plist_head *head)
*/
static inline int plist_node_empty(const struct plist_node *node)
{
- return plist_head_empty(&node->plist);
+ return list_empty(&node->node_list);
}
/* All functions below assume the plist_head is not empty. */
@@ -285,7 +286,7 @@ static inline int plist_node_empty(const struct plist_node *node)
static inline struct plist_node *plist_first(const struct plist_head *head)
{
return list_entry(head->node_list.next,
- struct plist_node, plist.node_list);
+ struct plist_node, node_list);
}
/**
@@ -297,7 +298,7 @@ static inline struct plist_node *plist_first(const struct plist_head *head)
static inline struct plist_node *plist_last(const struct plist_head *head)
{
return list_entry(head->node_list.prev,
- struct plist_node, plist.node_list);
+ struct plist_node, node_list);
}
#endif
diff --git a/include/linux/pm.h b/include/linux/pm.h
index dd9c7ab38270..6618216bb973 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -267,7 +267,7 @@ const struct dev_pm_ops name = { \
* callbacks provided by device drivers supporting both the system sleep PM and
* runtime PM, make the pm member point to generic_subsys_pm_ops.
*/
-#ifdef CONFIG_PM_OPS
+#ifdef CONFIG_PM
extern struct dev_pm_ops generic_subsys_pm_ops;
#define GENERIC_SUBSYS_PM_OPS (&generic_subsys_pm_ops)
#else
@@ -431,6 +431,8 @@ struct dev_pm_info {
struct list_head entry;
struct completion completion;
struct wakeup_source *wakeup;
+#else
+ unsigned int should_wakeup:1;
#endif
#ifdef CONFIG_PM_RUNTIME
struct timer_list suspend_timer;
@@ -463,6 +465,14 @@ struct dev_pm_info {
extern void update_pm_runtime_accounting(struct device *dev);
+/*
+ * Power domains provide callbacks that are executed during system suspend,
+ * hibernation, system resume and during runtime PM transitions along with
+ * subsystem-level and driver-level callbacks.
+ */
+struct dev_power_domain {
+ struct dev_pm_ops ops;
+};
/*
* The PM_EVENT_ messages are also used by drivers implementing the legacy
@@ -563,15 +573,6 @@ enum dpm_order {
DPM_ORDER_DEV_LAST,
};
-/*
- * Global Power Management flags
- * Used to keep APM and ACPI from both being active
- */
-extern unsigned int pm_flags;
-
-#define PM_APM 1
-#define PM_ACPI 2
-
extern int pm_generic_suspend(struct device *dev);
extern int pm_generic_resume(struct device *dev);
extern int pm_generic_freeze(struct device *dev);
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index d34f067e2a7f..8de9aa6e7def 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -87,6 +87,11 @@ static inline bool pm_runtime_enabled(struct device *dev)
return !dev->power.disable_depth;
}
+static inline bool pm_runtime_callbacks_present(struct device *dev)
+{
+ return !dev->power.no_callbacks;
+}
+
static inline void pm_runtime_mark_last_busy(struct device *dev)
{
ACCESS_ONCE(dev->power.last_busy) = jiffies;
@@ -133,6 +138,7 @@ static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
static inline void pm_runtime_no_callbacks(struct device *dev) {}
static inline void pm_runtime_irq_safe(struct device *dev) {}
+static inline bool pm_runtime_callbacks_present(struct device *dev) { return false; }
static inline void pm_runtime_mark_last_busy(struct device *dev) {}
static inline void __pm_runtime_use_autosuspend(struct device *dev,
bool use) {}
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 9cff00dd6b63..a32da962d693 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -62,18 +62,11 @@ struct wakeup_source {
* Changes to device_may_wakeup take effect on the next pm state change.
*/
-static inline void device_set_wakeup_capable(struct device *dev, bool capable)
-{
- dev->power.can_wakeup = capable;
-}
-
static inline bool device_can_wakeup(struct device *dev)
{
return dev->power.can_wakeup;
}
-
-
static inline bool device_may_wakeup(struct device *dev)
{
return dev->power.can_wakeup && !!dev->power.wakeup;
@@ -88,6 +81,7 @@ extern struct wakeup_source *wakeup_source_register(const char *name);
extern void wakeup_source_unregister(struct wakeup_source *ws);
extern int device_wakeup_enable(struct device *dev);
extern int device_wakeup_disable(struct device *dev);
+extern void device_set_wakeup_capable(struct device *dev, bool capable);
extern int device_init_wakeup(struct device *dev, bool val);
extern int device_set_wakeup_enable(struct device *dev, bool enable);
extern void __pm_stay_awake(struct wakeup_source *ws);
@@ -109,11 +103,6 @@ static inline bool device_can_wakeup(struct device *dev)
return dev->power.can_wakeup;
}
-static inline bool device_may_wakeup(struct device *dev)
-{
- return false;
-}
-
static inline struct wakeup_source *wakeup_source_create(const char *name)
{
return NULL;
@@ -134,24 +123,32 @@ static inline void wakeup_source_unregister(struct wakeup_source *ws) {}
static inline int device_wakeup_enable(struct device *dev)
{
- return -EINVAL;
+ dev->power.should_wakeup = true;
+ return 0;
}
static inline int device_wakeup_disable(struct device *dev)
{
+ dev->power.should_wakeup = false;
return 0;
}
-static inline int device_init_wakeup(struct device *dev, bool val)
+static inline int device_set_wakeup_enable(struct device *dev, bool enable)
{
- dev->power.can_wakeup = val;
- return val ? -EINVAL : 0;
+ dev->power.should_wakeup = enable;
+ return 0;
}
+static inline int device_init_wakeup(struct device *dev, bool val)
+{
+ device_set_wakeup_capable(dev, val);
+ device_set_wakeup_enable(dev, val);
+ return 0;
+}
-static inline int device_set_wakeup_enable(struct device *dev, bool enable)
+static inline bool device_may_wakeup(struct device *dev)
{
- return -EINVAL;
+ return dev->power.can_wakeup && dev->power.should_wakeup;
}
static inline void __pm_stay_awake(struct wakeup_source *ws) {}
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
new file mode 100644
index 000000000000..369e19d3750b
--- /dev/null
+++ b/include/linux/posix-clock.h
@@ -0,0 +1,150 @@
+/*
+ * posix-clock.h - support for dynamic clock devices
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _LINUX_POSIX_CLOCK_H_
+#define _LINUX_POSIX_CLOCK_H_
+
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/posix-timers.h>
+
+struct posix_clock;
+
+/**
+ * struct posix_clock_operations - functional interface to the clock
+ *
+ * Every posix clock is represented by a character device. Drivers may
+ * optionally offer extended capabilities by implementing the
+ * character device methods. The character device file operations are
+ * first handled by the clock device layer, then passed on to the
+ * driver by calling these functions.
+ *
+ * @owner: The clock driver should set to THIS_MODULE
+ * @clock_adjtime: Adjust the clock
+ * @clock_gettime: Read the current time
+ * @clock_getres: Get the clock resolution
+ * @clock_settime: Set the current time value
+ * @timer_create: Create a new timer
+ * @timer_delete: Remove a previously created timer
+ * @timer_gettime: Get remaining time and interval of a timer
+ * @timer_setttime: Set a timer's initial expiration and interval
+ * @fasync: Optional character device fasync method
+ * @mmap: Optional character device mmap method
+ * @open: Optional character device open method
+ * @release: Optional character device release method
+ * @ioctl: Optional character device ioctl method
+ * @read: Optional character device read method
+ * @poll: Optional character device poll method
+ */
+struct posix_clock_operations {
+ struct module *owner;
+
+ int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx);
+
+ int (*clock_gettime)(struct posix_clock *pc, struct timespec *ts);
+
+ int (*clock_getres) (struct posix_clock *pc, struct timespec *ts);
+
+ int (*clock_settime)(struct posix_clock *pc,
+ const struct timespec *ts);
+
+ int (*timer_create) (struct posix_clock *pc, struct k_itimer *kit);
+
+ int (*timer_delete) (struct posix_clock *pc, struct k_itimer *kit);
+
+ void (*timer_gettime)(struct posix_clock *pc,
+ struct k_itimer *kit, struct itimerspec *tsp);
+
+ int (*timer_settime)(struct posix_clock *pc,
+ struct k_itimer *kit, int flags,
+ struct itimerspec *tsp, struct itimerspec *old);
+ /*
+ * Optional character device methods:
+ */
+ int (*fasync) (struct posix_clock *pc,
+ int fd, struct file *file, int on);
+
+ long (*ioctl) (struct posix_clock *pc,
+ unsigned int cmd, unsigned long arg);
+
+ int (*mmap) (struct posix_clock *pc,
+ struct vm_area_struct *vma);
+
+ int (*open) (struct posix_clock *pc, fmode_t f_mode);
+
+ uint (*poll) (struct posix_clock *pc,
+ struct file *file, poll_table *wait);
+
+ int (*release) (struct posix_clock *pc);
+
+ ssize_t (*read) (struct posix_clock *pc,
+ uint flags, char __user *buf, size_t cnt);
+};
+
+/**
+ * struct posix_clock - represents a dynamic posix clock
+ *
+ * @ops: Functional interface to the clock
+ * @cdev: Character device instance for this clock
+ * @kref: Reference count.
+ * @mutex: Protects the 'zombie' field from concurrent access.
+ * @zombie: If 'zombie' is true, then the hardware has disappeared.
+ * @release: A function to free the structure when the reference count reaches
+ * zero. May be NULL if structure is statically allocated.
+ *
+ * Drivers should embed their struct posix_clock within a private
+ * structure, obtaining a reference to it during callbacks using
+ * container_of().
+ */
+struct posix_clock {
+ struct posix_clock_operations ops;
+ struct cdev cdev;
+ struct kref kref;
+ struct mutex mutex;
+ bool zombie;
+ void (*release)(struct posix_clock *clk);
+};
+
+/**
+ * posix_clock_register() - register a new clock
+ * @clk: Pointer to the clock. Caller must provide 'ops' and 'release'
+ * @devid: Allocated device id
+ *
+ * A clock driver calls this function to register itself with the
+ * clock device subsystem. If 'clk' points to dynamically allocated
+ * memory, then the caller must provide a 'release' function to free
+ * that memory.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+int posix_clock_register(struct posix_clock *clk, dev_t devid);
+
+/**
+ * posix_clock_unregister() - unregister a clock
+ * @clk: Clock instance previously registered via posix_clock_register()
+ *
+ * A clock driver calls this function to remove itself from the clock
+ * device subsystem. The posix_clock itself will remain (in an
+ * inactive state) until its reference count drops to zero, at which
+ * point it will be deallocated with its 'release' method.
+ */
+void posix_clock_unregister(struct posix_clock *clk);
+
+#endif
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 3e23844a6990..d51243ae0726 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -4,6 +4,7 @@
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/sched.h>
+#include <linux/timex.h>
union cpu_time_count {
cputime_t cpu;
@@ -17,10 +18,21 @@ struct cpu_timer_list {
int firing;
};
+/*
+ * Bit fields within a clockid:
+ *
+ * The most significant 29 bits hold either a pid or a file descriptor.
+ *
+ * Bit 2 indicates whether a cpu clock refers to a thread or a process.
+ *
+ * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
+ *
+ * A clockid is invalid if bits 2, 1, and 0 are all set.
+ */
#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3))
#define CPUCLOCK_PERTHREAD(clock) \
(((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
-#define CPUCLOCK_PID_MASK 7
+
#define CPUCLOCK_PERTHREAD_MASK 4
#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
#define CPUCLOCK_CLOCK_MASK 3
@@ -28,12 +40,17 @@ struct cpu_timer_list {
#define CPUCLOCK_VIRT 1
#define CPUCLOCK_SCHED 2
#define CPUCLOCK_MAX 3
+#define CLOCKFD CPUCLOCK_MAX
+#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
#define MAKE_PROCESS_CPUCLOCK(pid, clock) \
((~(clockid_t) (pid) << 3) | (clockid_t) (clock))
#define MAKE_THREAD_CPUCLOCK(tid, clock) \
MAKE_PROCESS_CPUCLOCK((tid), (clock) | CPUCLOCK_PERTHREAD_MASK)
+#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD)
+#define CLOCKID_TO_FD(clk) ((unsigned int) ~((clk) >> 3))
+
/* POSIX.1b interval timer structure. */
struct k_itimer {
struct list_head list; /* free/ allocate list */
@@ -67,10 +84,11 @@ struct k_itimer {
};
struct k_clock {
- int res; /* in nanoseconds */
int (*clock_getres) (const clockid_t which_clock, struct timespec *tp);
- int (*clock_set) (const clockid_t which_clock, struct timespec * tp);
+ int (*clock_set) (const clockid_t which_clock,
+ const struct timespec *tp);
int (*clock_get) (const clockid_t which_clock, struct timespec * tp);
+ int (*clock_adj) (const clockid_t which_clock, struct timex *tx);
int (*timer_create) (struct k_itimer *timer);
int (*nsleep) (const clockid_t which_clock, int flags,
struct timespec *, struct timespec __user *);
@@ -84,28 +102,14 @@ struct k_clock {
struct itimerspec * cur_setting);
};
-void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock);
+extern struct k_clock clock_posix_cpu;
+extern struct k_clock clock_posix_dynamic;
-/* error handlers for timer_create, nanosleep and settime */
-int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *,
- struct timespec __user *);
-int do_posix_clock_nosettime(const clockid_t, struct timespec *tp);
+void posix_timers_register_clock(const clockid_t clock_id, struct k_clock *new_clock);
/* function to call to trigger timer event */
int posix_timer_event(struct k_itimer *timr, int si_private);
-int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *ts);
-int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *ts);
-int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *ts);
-int posix_cpu_timer_create(struct k_itimer *timer);
-int posix_cpu_nsleep(const clockid_t which_clock, int flags,
- struct timespec *rqtp, struct timespec __user *rmtp);
-long posix_cpu_nsleep_restart(struct restart_block *restart_block);
-int posix_cpu_timer_set(struct k_itimer *timer, int flags,
- struct itimerspec *new, struct itimerspec *old);
-int posix_cpu_timer_del(struct k_itimer *timer);
-void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp);
-
void posix_cpu_timer_schedule(struct k_itimer *timer);
void run_posix_cpu_timers(struct task_struct *task);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 092a04f874a8..a1147e5dd245 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -102,11 +102,8 @@
extern long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data);
-extern int ptrace_traceme(void);
extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
-extern int ptrace_attach(struct task_struct *tsk);
-extern int ptrace_detach(struct task_struct *, unsigned int);
extern void ptrace_disable(struct task_struct *);
extern int ptrace_check_attach(struct task_struct *task, int kill);
extern int ptrace_request(struct task_struct *child, long request,
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index 3b94c91f20a6..6deef5dc95fb 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -63,6 +63,7 @@ extern const struct xattr_handler reiserfs_xattr_trusted_handler;
extern const struct xattr_handler reiserfs_xattr_security_handler;
#ifdef CONFIG_REISERFS_FS_SECURITY
int reiserfs_security_init(struct inode *dir, struct inode *inode,
+ const struct qstr *qstr,
struct reiserfs_security_handle *sec);
int reiserfs_security_write(struct reiserfs_transaction_handle *th,
struct inode *inode,
@@ -130,6 +131,7 @@ static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
#ifndef CONFIG_REISERFS_FS_SECURITY
static inline int reiserfs_security_init(struct inode *dir,
struct inode *inode,
+ const struct qstr *qstr,
struct reiserfs_security_handle *sec)
{
return 0;
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 8d3a2486544d..ab38ac80b0f9 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -100,6 +100,8 @@ void ring_buffer_free(struct ring_buffer *buffer);
int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
+void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val);
+
struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer,
unsigned long length);
int ring_buffer_unlock_commit(struct ring_buffer *buffer,
diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h
index d63dcbaea169..9026b30238f3 100644
--- a/include/linux/rio_regs.h
+++ b/include/linux/rio_regs.h
@@ -14,10 +14,12 @@
#define LINUX_RIO_REGS_H
/*
- * In RapidIO, each device has a 2MB configuration space that is
+ * In RapidIO, each device has a 16MB configuration space that is
* accessed via maintenance transactions. Portions of configuration
* space are standardized and/or reserved.
*/
+#define RIO_MAINT_SPACE_SZ 0x1000000 /* 16MB of RapidIO mainenance space */
+
#define RIO_DEV_ID_CAR 0x00 /* [I] Device Identity CAR */
#define RIO_DEV_INFO_CAR 0x04 /* [I] Device Information CAR */
#define RIO_ASM_ID_CAR 0x08 /* [I] Assembly Identity CAR */
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index a0b639f8e805..2ca7e8a78060 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -133,7 +133,6 @@ extern struct class *rtc_class;
* The (current) exceptions are mostly filesystem hooks:
* - the proc() hook for procfs
* - non-ioctl() chardev hooks: open(), release(), read_callback()
- * - periodic irq calls: irq_set_state(), irq_set_freq()
*
* REVISIT those periodic irq calls *do* have ops_lock when they're
* issued through ioctl() ...
@@ -148,11 +147,8 @@ struct rtc_class_ops {
int (*set_alarm)(struct device *, struct rtc_wkalrm *);
int (*proc)(struct device *, struct seq_file *);
int (*set_mmss)(struct device *, unsigned long secs);
- int (*irq_set_state)(struct device *, int enabled);
- int (*irq_set_freq)(struct device *, int freq);
int (*read_callback)(struct device *, int data);
int (*alarm_irq_enable)(struct device *, unsigned int enabled);
- int (*update_irq_enable)(struct device *, unsigned int enabled);
};
#define RTC_DEVICE_NAME_SIZE 20
@@ -203,6 +199,18 @@ struct rtc_device
struct hrtimer pie_timer; /* sub second exp, so needs hrtimer */
int pie_enabled;
struct work_struct irqwork;
+
+
+#ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
+ struct work_struct uie_task;
+ struct timer_list uie_timer;
+ /* Those fields are protected by rtc->irq_lock */
+ unsigned int oldsecs;
+ unsigned int uie_irq_active:1;
+ unsigned int stop_uie_polling:1;
+ unsigned int uie_task_active:1;
+ unsigned int uie_timer_active:1;
+#endif
};
#define to_rtc_device(d) container_of(d, struct rtc_device, dev)
@@ -215,6 +223,7 @@ extern void rtc_device_unregister(struct rtc_device *rtc);
extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs);
+int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
extern int rtc_read_alarm(struct rtc_device *rtc,
struct rtc_wkalrm *alrm);
extern int rtc_set_alarm(struct rtc_device *rtc,
@@ -235,7 +244,10 @@ extern int rtc_irq_set_freq(struct rtc_device *rtc,
struct rtc_task *task, int freq);
extern int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled);
extern int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled);
+extern int rtc_dev_update_irq_enable_emul(struct rtc_device *rtc,
+ unsigned int enabled);
+void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode);
void rtc_aie_update_irq(void *private);
void rtc_uie_update_irq(void *private);
enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer);
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
index bd31808c7d8e..cc0072e93e36 100644
--- a/include/linux/rwlock_types.h
+++ b/include/linux/rwlock_types.h
@@ -43,14 +43,6 @@ typedef struct {
RW_DEP_MAP_INIT(lockname) }
#endif
-/*
- * RW_LOCK_UNLOCKED defeat lockdep state tracking and is hence
- * deprecated.
- *
- * Please use DEFINE_RWLOCK() or __RW_LOCK_UNLOCKED() as appropriate.
- */
-#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init)
-
#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
#endif /* __LINUX_RWLOCK_TYPES_H */
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index bdfcc2527970..34701241b673 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -12,15 +12,7 @@
#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead"
#endif
-#include <linux/spinlock.h>
-#include <linux/list.h>
-
#ifdef __KERNEL__
-
-#include <linux/types.h>
-
-struct rwsem_waiter;
-
/*
* the rw-semaphore definition
* - if activity is 0 then there are no active readers or writers
@@ -37,28 +29,7 @@ struct rw_semaphore {
#endif
};
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
+#define RWSEM_UNLOCKED_VALUE 0x00000000
extern void __down_read(struct rw_semaphore *sem);
extern int __down_read_trylock(struct rw_semaphore *sem);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index efd348fe8ca7..a8afe9cd000c 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -11,6 +11,9 @@
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
#include <asm/system.h>
#include <asm/atomic.h>
@@ -19,9 +22,57 @@ struct rw_semaphore;
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
#include <linux/rwsem-spinlock.h> /* use a generic implementation */
#else
-#include <asm/rwsem.h> /* use an arch-specific implementation */
+/* All arch specific implementations share the same struct */
+struct rw_semaphore {
+ long count;
+ spinlock_t wait_lock;
+ struct list_head wait_list;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
+extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
+extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
+
+/* Include the arch specific part */
+#include <asm/rwsem.h>
+
+/* In all implementations count != 0 means locked */
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return sem->count != 0;
+}
+
+#endif
+
+/* Common initializer macros and functions */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
+#else
+# define __RWSEM_DEP_MAP_INIT(lockname)
#endif
+#define __RWSEM_INITIALIZER(name) \
+ { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED(name.wait_lock), \
+ LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
+
+#define DECLARE_RWSEM(name) \
+ struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+
+extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
+ struct lock_class_key *key);
+
+#define init_rwsem(sem) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ __init_rwsem((sem), #sem, &__key); \
+} while (0)
+
/*
* lock for reading
*/
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d747f948b34e..c15936fe998b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1058,6 +1058,7 @@ struct sched_class {
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq);
+ bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt);
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
@@ -1084,12 +1085,10 @@ struct sched_class {
void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
void (*task_fork) (struct task_struct *p);
- void (*switched_from) (struct rq *this_rq, struct task_struct *task,
- int running);
- void (*switched_to) (struct rq *this_rq, struct task_struct *task,
- int running);
+ void (*switched_from) (struct rq *this_rq, struct task_struct *task);
+ void (*switched_to) (struct rq *this_rq, struct task_struct *task);
void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
- int oldprio, int running);
+ int oldprio);
unsigned int (*get_rr_interval) (struct rq *rq,
struct task_struct *task);
@@ -1715,7 +1714,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
/*
* Per process flags
*/
-#define PF_KSOFTIRQD 0x00000001 /* I am ksoftirqd */
#define PF_STARTING 0x00000002 /* being created */
#define PF_EXITING 0x00000004 /* getting shut down */
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
@@ -1744,7 +1742,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
-#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */
+#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */
/*
@@ -1945,8 +1943,6 @@ int sched_rt_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
-extern unsigned int sysctl_sched_compat_yield;
-
#ifdef CONFIG_SCHED_AUTOGROUP
extern unsigned int sysctl_sched_autogroup_enabled;
@@ -1977,6 +1973,7 @@ static inline int rt_mutex_getprio(struct task_struct *p)
# define rt_mutex_adjust_pi(p) do { } while (0)
#endif
+extern bool yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
extern int task_nice(const struct task_struct *p);
@@ -2049,7 +2046,7 @@ extern void release_uids(struct user_namespace *ns);
#include <asm/current.h>
-extern void do_timer(unsigned long ticks);
+extern void xtime_update(unsigned long ticks);
extern int wake_up_state(struct task_struct *tsk, unsigned int state);
extern int wake_up_process(struct task_struct *tsk);
@@ -2578,13 +2575,6 @@ static inline void inc_syscw(struct task_struct *tsk)
#define TASK_SIZE_OF(tsk) TASK_SIZE
#endif
-/*
- * Call the function if the target task is executing on a CPU right now:
- */
-extern void task_oncpu_function_call(struct task_struct *p,
- void (*func) (void *info), void *info);
-
-
#ifdef CONFIG_MM_OWNER
extern void mm_update_next_owner(struct mm_struct *mm);
extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
diff --git a/include/linux/security.h b/include/linux/security.h
index b2b7f9749f5e..83d9227abf02 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -25,6 +25,7 @@
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/binfmts.h>
+#include <linux/dcache.h>
#include <linux/signal.h>
#include <linux/resource.h>
#include <linux/sem.h>
@@ -53,7 +54,7 @@ struct audit_krule;
*/
extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
int cap, int audit);
-extern int cap_settime(struct timespec *ts, struct timezone *tz);
+extern int cap_settime(const struct timespec *ts, const struct timezone *tz);
extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
extern int cap_ptrace_traceme(struct task_struct *parent);
extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
@@ -267,6 +268,12 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
* @orig the original mount data copied from userspace.
* @copy copied data which will be passed to the security module.
* Returns 0 if the copy was successful.
+ * @sb_remount:
+ * Extracts security system specifc mount options and verifys no changes
+ * are being made to those options.
+ * @sb superblock being remounted
+ * @data contains the filesystem-specific data.
+ * Return 0 if permission is granted.
* @sb_umount:
* Check permission before the @mnt file system is unmounted.
* @mnt contains the mounted file system.
@@ -315,6 +322,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
* then it should return -EOPNOTSUPP to skip this processing.
* @inode contains the inode structure of the newly created inode.
* @dir contains the inode structure of the parent directory.
+ * @qstr contains the last path component of the new object
* @name will be set to the allocated name suffix (e.g. selinux).
* @value will be set to the allocated attribute value.
* @len will be set to the length of the value.
@@ -1257,12 +1265,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
* @cap contains the capability <include/linux/capability.h>.
* @audit: Whether to write an audit message or not
* Return 0 if the capability is granted for @tsk.
- * @sysctl:
- * Check permission before accessing the @table sysctl variable in the
- * manner specified by @op.
- * @table contains the ctl_table structure for the sysctl variable.
- * @op contains the operation (001 = search, 002 = write, 004 = read).
- * Return 0 if permission is granted.
* @syslog:
* Check permission before accessing the kernel message ring or changing
* logging to the console.
@@ -1383,11 +1385,10 @@ struct security_operations {
const kernel_cap_t *permitted);
int (*capable) (struct task_struct *tsk, const struct cred *cred,
int cap, int audit);
- int (*sysctl) (struct ctl_table *table, int op);
int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
int (*quota_on) (struct dentry *dentry);
int (*syslog) (int type);
- int (*settime) (struct timespec *ts, struct timezone *tz);
+ int (*settime) (const struct timespec *ts, const struct timezone *tz);
int (*vm_enough_memory) (struct mm_struct *mm, long pages);
int (*bprm_set_creds) (struct linux_binprm *bprm);
@@ -1399,6 +1400,7 @@ struct security_operations {
int (*sb_alloc_security) (struct super_block *sb);
void (*sb_free_security) (struct super_block *sb);
int (*sb_copy_data) (char *orig, char *copy);
+ int (*sb_remount) (struct super_block *sb, void *data);
int (*sb_kern_mount) (struct super_block *sb, int flags, void *data);
int (*sb_show_options) (struct seq_file *m, struct super_block *sb);
int (*sb_statfs) (struct dentry *dentry);
@@ -1435,7 +1437,8 @@ struct security_operations {
int (*inode_alloc_security) (struct inode *inode);
void (*inode_free_security) (struct inode *inode);
int (*inode_init_security) (struct inode *inode, struct inode *dir,
- char **name, void **value, size_t *len);
+ const struct qstr *qstr, char **name,
+ void **value, size_t *len);
int (*inode_create) (struct inode *dir,
struct dentry *dentry, int mode);
int (*inode_link) (struct dentry *old_dentry,
@@ -1665,11 +1668,10 @@ int security_capset(struct cred *new, const struct cred *old,
int security_capable(const struct cred *cred, int cap);
int security_real_capable(struct task_struct *tsk, int cap);
int security_real_capable_noaudit(struct task_struct *tsk, int cap);
-int security_sysctl(struct ctl_table *table, int op);
int security_quotactl(int cmds, int type, int id, struct super_block *sb);
int security_quota_on(struct dentry *dentry);
int security_syslog(int type);
-int security_settime(struct timespec *ts, struct timezone *tz);
+int security_settime(const struct timespec *ts, const struct timezone *tz);
int security_vm_enough_memory(long pages);
int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
int security_vm_enough_memory_kern(long pages);
@@ -1681,6 +1683,7 @@ int security_bprm_secureexec(struct linux_binprm *bprm);
int security_sb_alloc(struct super_block *sb);
void security_sb_free(struct super_block *sb);
int security_sb_copy_data(char *orig, char *copy);
+int security_sb_remount(struct super_block *sb, void *data);
int security_sb_kern_mount(struct super_block *sb, int flags, void *data);
int security_sb_show_options(struct seq_file *m, struct super_block *sb);
int security_sb_statfs(struct dentry *dentry);
@@ -1696,7 +1699,8 @@ int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts);
int security_inode_alloc(struct inode *inode);
void security_inode_free(struct inode *inode);
int security_inode_init_security(struct inode *inode, struct inode *dir,
- char **name, void **value, size_t *len);
+ const struct qstr *qstr, char **name,
+ void **value, size_t *len);
int security_inode_create(struct inode *dir, struct dentry *dentry, int mode);
int security_inode_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry);
@@ -1883,11 +1887,6 @@ int security_real_capable_noaudit(struct task_struct *tsk, int cap)
return ret;
}
-static inline int security_sysctl(struct ctl_table *table, int op)
-{
- return 0;
-}
-
static inline int security_quotactl(int cmds, int type, int id,
struct super_block *sb)
{
@@ -1904,7 +1903,8 @@ static inline int security_syslog(int type)
return 0;
}
-static inline int security_settime(struct timespec *ts, struct timezone *tz)
+static inline int security_settime(const struct timespec *ts,
+ const struct timezone *tz)
{
return cap_settime(ts, tz);
}
@@ -1964,6 +1964,11 @@ static inline int security_sb_copy_data(char *orig, char *copy)
return 0;
}
+static inline int security_sb_remount(struct super_block *sb, void *data)
+{
+ return 0;
+}
+
static inline int security_sb_kern_mount(struct super_block *sb, int flags, void *data)
{
return 0;
@@ -2023,6 +2028,7 @@ static inline void security_inode_free(struct inode *inode)
static inline int security_inode_init_security(struct inode *inode,
struct inode *dir,
+ const struct qstr *qstr,
char **name,
void **value,
size_t *len)
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 851b7783720d..73548eb13a5d 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -81,14 +81,6 @@ typedef struct spinlock {
#define __SPIN_LOCK_UNLOCKED(lockname) \
(spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
-/*
- * SPIN_LOCK_UNLOCKED defeats lockdep state tracking and is hence
- * deprecated.
- * Please use DEFINE_SPINLOCK() or __SPIN_LOCK_UNLOCKED() as
- * appropriate.
- */
-#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init)
-
#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
#include <linux/rwlock_types.h>
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 88513fd8e208..d81db8012c63 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -212,6 +212,7 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
const struct rpc_call_ops *ops);
void rpc_put_task(struct rpc_task *);
+void rpc_put_task_async(struct rpc_task *);
void rpc_exit_task(struct rpc_task *);
void rpc_exit(struct rpc_task *, int);
void rpc_release_calldata(const struct rpc_call_ops *, void *);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 98664db1be47..1f5c18e6f4f1 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -62,6 +62,7 @@ struct robust_list_head;
struct getcpu_cache;
struct old_linux_dirent;
struct perf_event_attr;
+struct file_handle;
#include <linux/types.h>
#include <linux/aio_abi.h>
@@ -132,11 +133,11 @@ extern struct trace_event_functions exit_syscall_print_funcs;
.class = &event_class_syscall_enter, \
.event.funcs = &enter_syscall_print_funcs, \
.data = (void *)&__syscall_meta_##sname,\
+ .flags = TRACE_EVENT_FL_CAP_ANY, \
}; \
static struct ftrace_event_call __used \
__attribute__((section("_ftrace_events"))) \
- *__event_enter_##sname = &event_enter_##sname; \
- __TRACE_EVENT_FLAGS(enter_##sname, TRACE_EVENT_FL_CAP_ANY)
+ *__event_enter_##sname = &event_enter_##sname;
#define SYSCALL_TRACE_EXIT_EVENT(sname) \
static struct syscall_metadata __syscall_meta_##sname; \
@@ -146,11 +147,11 @@ extern struct trace_event_functions exit_syscall_print_funcs;
.class = &event_class_syscall_exit, \
.event.funcs = &exit_syscall_print_funcs, \
.data = (void *)&__syscall_meta_##sname,\
+ .flags = TRACE_EVENT_FL_CAP_ANY, \
}; \
static struct ftrace_event_call __used \
__attribute__((section("_ftrace_events"))) \
- *__event_exit_##sname = &event_exit_##sname; \
- __TRACE_EVENT_FLAGS(exit_##sname, TRACE_EVENT_FL_CAP_ANY)
+ *__event_exit_##sname = &event_exit_##sname;
#define SYSCALL_METADATA(sname, nb) \
SYSCALL_TRACE_ENTER_EVENT(sname); \
@@ -158,6 +159,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
static struct syscall_metadata __used \
__syscall_meta_##sname = { \
.name = "sys"#sname, \
+ .syscall_nr = -1, /* Filled in at boot */ \
.nb_args = nb, \
.types = types_##sname, \
.args = args_##sname, \
@@ -175,6 +177,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
static struct syscall_metadata __used \
__syscall_meta__##sname = { \
.name = "sys_"#sname, \
+ .syscall_nr = -1, /* Filled in at boot */ \
.nb_args = 0, \
.enter_event = &event_enter__##sname, \
.exit_event = &event_exit__##sname, \
@@ -313,6 +316,8 @@ asmlinkage long sys_clock_settime(clockid_t which_clock,
const struct timespec __user *tp);
asmlinkage long sys_clock_gettime(clockid_t which_clock,
struct timespec __user *tp);
+asmlinkage long sys_clock_adjtime(clockid_t which_clock,
+ struct timex __user *tx);
asmlinkage long sys_clock_getres(clockid_t which_clock,
struct timespec __user *tp);
asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags,
@@ -832,5 +837,10 @@ asmlinkage long sys_mmap_pgoff(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff);
asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
-
+asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
+ struct file_handle __user *handle,
+ int __user *mnt_id, int flag);
+asmlinkage long sys_open_by_handle_at(int mountdirfd,
+ struct file_handle __user *handle,
+ int flags);
#endif
diff --git a/include/linux/syscore_ops.h b/include/linux/syscore_ops.h
new file mode 100644
index 000000000000..27b3b0bc41a9
--- /dev/null
+++ b/include/linux/syscore_ops.h
@@ -0,0 +1,29 @@
+/*
+ * syscore_ops.h - System core operations.
+ *
+ * Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef _LINUX_SYSCORE_OPS_H
+#define _LINUX_SYSCORE_OPS_H
+
+#include <linux/list.h>
+
+struct syscore_ops {
+ struct list_head node;
+ int (*suspend)(void);
+ void (*resume)(void);
+ void (*shutdown)(void);
+};
+
+extern void register_syscore_ops(struct syscore_ops *ops);
+extern void unregister_syscore_ops(struct syscore_ops *ops);
+#ifdef CONFIG_PM_SLEEP
+extern int syscore_suspend(void);
+extern void syscore_resume(void);
+#endif
+extern void syscore_shutdown(void);
+
+#endif
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 7bb5cb64f3b8..11684d9e6bd2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -930,6 +930,7 @@ enum
#ifdef __KERNEL__
#include <linux/list.h>
+#include <linux/rcupdate.h>
/* For the /proc/sys support */
struct ctl_table;
@@ -1037,10 +1038,15 @@ struct ctl_table_root {
struct ctl_table trees. */
struct ctl_table_header
{
- struct ctl_table *ctl_table;
- struct list_head ctl_entry;
- int used;
- int count;
+ union {
+ struct {
+ struct ctl_table *ctl_table;
+ struct list_head ctl_entry;
+ int used;
+ int count;
+ };
+ struct rcu_head rcu;
+ };
struct completion *unregistering;
struct ctl_table *ctl_table_arg;
struct ctl_table_root *root;
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 8651556dbd52..d3ec89fb4122 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -172,6 +172,14 @@ void thermal_zone_device_update(struct thermal_zone_device *);
struct thermal_cooling_device *thermal_cooling_device_register(char *, void *,
const struct thermal_cooling_device_ops *);
void thermal_cooling_device_unregister(struct thermal_cooling_device *);
+
+#ifdef CONFIG_NET
extern int generate_netlink_event(u32 orig, enum events event);
+#else
+static inline int generate_netlink_event(u32 orig, enum events event)
+{
+ return 0;
+}
+#endif
#endif /* __THERMAL_H__ */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index c90696544176..20fc303947d3 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -18,9 +18,6 @@ struct compat_timespec;
struct restart_block {
long (*fn)(struct restart_block *);
union {
- struct {
- unsigned long arg0, arg1, arg2, arg3;
- };
/* For futex_wait and futex_wait_requeue_pi */
struct {
u32 __user *uaddr;
diff --git a/include/linux/time.h b/include/linux/time.h
index 1e6d3b59238d..454a26205787 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -113,8 +113,6 @@ static inline struct timespec timespec_sub(struct timespec lhs,
#define timespec_valid(ts) \
(((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC))
-extern seqlock_t xtime_lock;
-
extern void read_persistent_clock(struct timespec *ts);
extern void read_boot_clock(struct timespec *ts);
extern int update_persistent_clock(struct timespec now);
@@ -125,8 +123,9 @@ extern int timekeeping_suspended;
unsigned long get_seconds(void);
struct timespec current_kernel_time(void);
struct timespec __current_kernel_time(void); /* does not take xtime_lock */
-struct timespec __get_wall_to_monotonic(void); /* does not take xtime_lock */
struct timespec get_monotonic_coarse(void);
+void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
+ struct timespec *wtom, struct timespec *sleep);
#define CURRENT_TIME (current_kernel_time())
#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 })
@@ -147,8 +146,9 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
#endif
extern void do_gettimeofday(struct timeval *tv);
-extern int do_settimeofday(struct timespec *tv);
-extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
+extern int do_settimeofday(const struct timespec *tv);
+extern int do_sys_settimeofday(const struct timespec *tv,
+ const struct timezone *tz);
#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags);
struct itimerval;
@@ -162,12 +162,13 @@ extern void getnstime_raw_and_real(struct timespec *ts_raw,
struct timespec *ts_real);
extern void getboottime(struct timespec *ts);
extern void monotonic_to_bootbased(struct timespec *ts);
+extern void get_monotonic_boottime(struct timespec *ts);
extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
extern int timekeeping_valid_for_hres(void);
extern u64 timekeeping_max_deferment(void);
-extern void update_wall_time(void);
extern void timekeeping_leap_insert(int leapsecond);
+extern int timekeeping_inject_offset(struct timespec *ts);
struct tms;
extern void do_sys_times(struct tms *);
@@ -292,6 +293,7 @@ struct itimerval {
#define CLOCK_MONOTONIC_RAW 4
#define CLOCK_REALTIME_COARSE 5
#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
/*
* The IDs of various hardware clocks:
diff --git a/include/linux/timex.h b/include/linux/timex.h
index d23999f9499d..aa60fe7b6ed6 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -73,7 +73,7 @@ struct timex {
long tolerance; /* clock frequency tolerance (ppm)
* (read only)
*/
- struct timeval time; /* (read only) */
+ struct timeval time; /* (read only, except for ADJ_SETOFFSET) */
long tick; /* (modified) usecs between clock ticks */
long ppsfreq; /* pps frequency (scaled ppm) (ro) */
@@ -102,6 +102,7 @@ struct timex {
#define ADJ_STATUS 0x0010 /* clock status */
#define ADJ_TIMECONST 0x0020 /* pll time constant */
#define ADJ_TAI 0x0080 /* set TAI offset */
+#define ADJ_SETOFFSET 0x0100 /* add 'time' to current time */
#define ADJ_MICRO 0x1000 /* select microsecond resolution */
#define ADJ_NANO 0x2000 /* select nanosecond resolution */
#define ADJ_TICK 0x4000 /* tick value */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1ac11586a2f5..f584aba78ca9 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -250,7 +250,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
enum {
WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */
WQ_UNBOUND = 1 << 1, /* not bound to any cpu */
- WQ_FREEZEABLE = 1 << 2, /* freeze during suspend */
+ WQ_FREEZABLE = 1 << 2, /* freeze during suspend */
WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */
WQ_HIGHPRI = 1 << 4, /* high priority */
WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */
@@ -286,11 +286,15 @@ enum {
* any specific CPU, not concurrency managed, and all queued works are
* executed immediately as long as max_active limit is not reached and
* resources are available.
+ *
+ * system_freezable_wq is equivalent to system_wq except that it's
+ * freezable.
*/
extern struct workqueue_struct *system_wq;
extern struct workqueue_struct *system_long_wq;
extern struct workqueue_struct *system_nrt_wq;
extern struct workqueue_struct *system_unbound_wq;
+extern struct workqueue_struct *system_freezable_wq;
extern struct workqueue_struct *
__alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
@@ -318,7 +322,7 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
/**
* alloc_ordered_workqueue - allocate an ordered workqueue
* @name: name of the workqueue
- * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_MEM_RECLAIM are meaningful)
+ * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
*
* Allocate an ordered workqueue. An ordered workqueue executes at
* most one work item at any given time in the queued order. They are
@@ -335,8 +339,8 @@ alloc_ordered_workqueue(const char *name, unsigned int flags)
#define create_workqueue(name) \
alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
-#define create_freezeable_workqueue(name) \
- alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
+#define create_freezable_workqueue(name) \
+ alloc_workqueue((name), WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
#define create_singlethread_workqueue(name) \
alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index e6131ef98d8f..6050783005bd 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -42,11 +42,13 @@
#define XATTR_SMACK_IPOUT "SMACK64IPOUT"
#define XATTR_SMACK_EXEC "SMACK64EXEC"
#define XATTR_SMACK_TRANSMUTE "SMACK64TRANSMUTE"
+#define XATTR_SMACK_MMAP "SMACK64MMAP"
#define XATTR_NAME_SMACK XATTR_SECURITY_PREFIX XATTR_SMACK_SUFFIX
#define XATTR_NAME_SMACKIPIN XATTR_SECURITY_PREFIX XATTR_SMACK_IPIN
#define XATTR_NAME_SMACKIPOUT XATTR_SECURITY_PREFIX XATTR_SMACK_IPOUT
#define XATTR_NAME_SMACKEXEC XATTR_SECURITY_PREFIX XATTR_SMACK_EXEC
#define XATTR_NAME_SMACKTRANSMUTE XATTR_SECURITY_PREFIX XATTR_SMACK_TRANSMUTE
+#define XATTR_NAME_SMACKMMAP XATTR_SECURITY_PREFIX XATTR_SMACK_MMAP
#define XATTR_CAPS_SUFFIX "capability"
#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 071fd7a8d781..6b75a6971346 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -139,6 +139,8 @@ do { \
*/
enum p9_msg_t {
+ P9_TSYNCFS = 0,
+ P9_RSYNCFS,
P9_TLERROR = 6,
P9_RLERROR,
P9_TSTATFS = 8,
@@ -688,7 +690,11 @@ struct p9_rwstat {
* @id: protocol operating identifier of type &p9_msg_t
* @tag: transaction id of the request
* @offset: used by marshalling routines to track currentposition in buffer
- * @capacity: used by marshalling routines to track total capacity
+ * @capacity: used by marshalling routines to track total malloc'd capacity
+ * @pubuf: Payload user buffer given by the caller
+ * @pubuf: Payload kernel buffer given by the caller
+ * @pbuf_size: pubuf/pkbuf(only one will be !NULL) size to be read/write.
+ * @private: For transport layer's use.
* @sdata: payload
*
* &p9_fcall represents the structure for all 9P RPC
@@ -705,6 +711,10 @@ struct p9_fcall {
size_t offset;
size_t capacity;
+ char __user *pubuf;
+ char *pkbuf;
+ size_t pbuf_size;
+ void *private;
uint8_t *sdata;
};
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 83ba6a4d58a3..0a30977e3c1f 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -230,6 +230,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
gid_t gid, struct p9_qid *qid);
int p9_client_clunk(struct p9_fid *fid);
int p9_client_fsync(struct p9_fid *fid, int datasync);
+int p9_client_sync_fs(struct p9_fid *fid);
int p9_client_remove(struct p9_fid *fid);
int p9_client_read(struct p9_fid *fid, char *data, char __user *udata,
u64 offset, u32 count);
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 6d5886efb102..82868f18c573 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -26,11 +26,19 @@
#ifndef NET_9P_TRANSPORT_H
#define NET_9P_TRANSPORT_H
+#define P9_TRANS_PREF_PAYLOAD_MASK 0x1
+
+/* Default. Add Payload to PDU before sending it down to transport layer */
+#define P9_TRANS_PREF_PAYLOAD_DEF 0x0
+/* Send pay load seperately to transport layer along with PDU.*/
+#define P9_TRANS_PREF_PAYLOAD_SEP 0x1
+
/**
* struct p9_trans_module - transport module interface
* @list: used to maintain a list of currently available transports
* @name: the human-readable name of the transport
* @maxsize: transport provided maximum packet size
+ * @pref: Preferences of this transport
* @def: set if this transport should be considered the default
* @create: member function to create a new connection on this transport
* @request: member function to issue a request to the transport
@@ -47,6 +55,7 @@ struct p9_trans_module {
struct list_head list;
char *name; /* name of transport */
int maxsize; /* max message size of transport */
+ int pref; /* Preferences of this transport */
int def; /* this transport should be default */
struct module *owner;
int (*create)(struct p9_client *, const char *, char *);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4a3cd2cd2f5e..96e50e0ce3ca 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -89,6 +89,18 @@
#define IPV6_ADDR_SCOPE_GLOBAL 0x0e
/*
+ * Addr flags
+ */
+#ifdef __KERNEL__
+#define IPV6_ADDR_MC_FLAG_TRANSIENT(a) \
+ ((a)->s6_addr[1] & 0x10)
+#define IPV6_ADDR_MC_FLAG_PREFIX(a) \
+ ((a)->s6_addr[1] & 0x20)
+#define IPV6_ADDR_MC_FLAG_RENDEZVOUS(a) \
+ ((a)->s6_addr[1] & 0x40)
+#endif
+
+/*
* fragmentation header
*/
diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h
index cd85b3bc8327..e505358d8999 100644
--- a/include/net/netfilter/nf_tproxy_core.h
+++ b/include/net/netfilter/nf_tproxy_core.h
@@ -201,18 +201,8 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
}
#endif
-static inline void
-nf_tproxy_put_sock(struct sock *sk)
-{
- /* TIME_WAIT inet sockets have to be handled differently */
- if ((sk->sk_protocol == IPPROTO_TCP) && (sk->sk_state == TCP_TIME_WAIT))
- inet_twsk_put(inet_twsk(sk));
- else
- sock_put(sk);
-}
-
/* assign a socket to the skb -- consumes sk */
-int
+void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk);
#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 160a407c1963..04f8556313d5 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -199,7 +199,7 @@ struct tcf_proto {
struct qdisc_skb_cb {
unsigned int pkt_len;
- char data[];
+ long data[];
};
static inline int qdisc_qlen(struct Qdisc *q)
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 8479b66c067b..3fd5064dd43a 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -261,6 +261,7 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev);
#define CONF_ENABLE_ESR 0x0008
#define CONF_ENABLE_IOCARD 0x0010 /* auto-enabled if IO resources or IRQ
* (CONF_ENABLE_IRQ) in use */
+#define CONF_ENABLE_ZVCARD 0x0020
/* flags used by pcmcia_loop_config() autoconfiguration */
#define CONF_AUTO_CHECK_VCC 0x0100 /* check for matching Vcc? */
diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h
index c583193ae929..9c159f74c6d0 100644
--- a/include/scsi/sas_ata.h
+++ b/include/scsi/sas_ata.h
@@ -39,6 +39,11 @@ int sas_ata_init_host_and_port(struct domain_device *found_dev,
struct scsi_target *starget);
void sas_ata_task_abort(struct sas_task *task);
+void sas_ata_strategy_handler(struct Scsi_Host *shost);
+int sas_ata_timed_out(struct scsi_cmnd *cmd, struct sas_task *task,
+ enum blk_eh_timer_return *rtn);
+int sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q,
+ struct list_head *done_q);
#else
@@ -55,6 +60,23 @@ static inline int sas_ata_init_host_and_port(struct domain_device *found_dev,
static inline void sas_ata_task_abort(struct sas_task *task)
{
}
+
+static inline void sas_ata_strategy_handler(struct Scsi_Host *shost)
+{
+}
+
+static inline int sas_ata_timed_out(struct scsi_cmnd *cmd,
+ struct sas_task *task,
+ enum blk_eh_timer_return *rtn)
+{
+ return 0;
+}
+static inline int sas_ata_eh(struct Scsi_Host *shost, struct list_head *work_q,
+ struct list_head *done_q)
+{
+ return 0;
+}
+
#endif
#endif /* _SAS_ATA_H_ */
diff --git a/include/sound/wm8903.h b/include/sound/wm8903.h
index b4a0db2307ef..1eeebd534f7e 100644
--- a/include/sound/wm8903.h
+++ b/include/sound/wm8903.h
@@ -17,13 +17,9 @@
/*
* R6 (0x06) - Mic Bias Control 0
*/
-#define WM8903_MICDET_HYST_ENA 0x0080 /* MICDET_HYST_ENA */
-#define WM8903_MICDET_HYST_ENA_MASK 0x0080 /* MICDET_HYST_ENA */
-#define WM8903_MICDET_HYST_ENA_SHIFT 7 /* MICDET_HYST_ENA */
-#define WM8903_MICDET_HYST_ENA_WIDTH 1 /* MICDET_HYST_ENA */
-#define WM8903_MICDET_THR_MASK 0x0070 /* MICDET_THR - [6:4] */
-#define WM8903_MICDET_THR_SHIFT 4 /* MICDET_THR - [6:4] */
-#define WM8903_MICDET_THR_WIDTH 3 /* MICDET_THR - [6:4] */
+#define WM8903_MICDET_THR_MASK 0x0030 /* MICDET_THR - [5:4] */
+#define WM8903_MICDET_THR_SHIFT 4 /* MICDET_THR - [5:4] */
+#define WM8903_MICDET_THR_WIDTH 2 /* MICDET_THR - [5:4] */
#define WM8903_MICSHORT_THR_MASK 0x000C /* MICSHORT_THR - [3:2] */
#define WM8903_MICSHORT_THR_SHIFT 2 /* MICSHORT_THR - [3:2] */
#define WM8903_MICSHORT_THR_WIDTH 2 /* MICSHORT_THR - [3:2] */
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 07fdfb6b9a9a..0828b6c8610a 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -8,7 +8,6 @@
#include <scsi/scsi_cmnd.h>
#include <net/sock.h>
#include <net/tcp.h>
-#include "target_core_mib.h"
#define TARGET_CORE_MOD_VERSION "v4.0.0-rc6"
#define SHUTDOWN_SIGS (sigmask(SIGKILL)|sigmask(SIGINT)|sigmask(SIGABRT))
@@ -195,6 +194,21 @@ typedef enum {
SAM_TASK_ATTR_EMULATED
} t10_task_attr_index_t;
+/*
+ * Used for target SCSI statistics
+ */
+typedef enum {
+ SCSI_INST_INDEX,
+ SCSI_DEVICE_INDEX,
+ SCSI_AUTH_INTR_INDEX,
+ SCSI_INDEX_TYPE_MAX
+} scsi_index_t;
+
+struct scsi_index_table {
+ spinlock_t lock;
+ u32 scsi_mib_index[SCSI_INDEX_TYPE_MAX];
+} ____cacheline_aligned;
+
struct se_cmd;
struct t10_alua {
@@ -578,8 +592,6 @@ struct se_node_acl {
spinlock_t stats_lock;
/* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
atomic_t acl_pr_ref_count;
- /* Used for MIB access */
- atomic_t mib_ref_count;
struct se_dev_entry *device_list;
struct se_session *nacl_sess;
struct se_portal_group *se_tpg;
@@ -595,8 +607,6 @@ struct se_node_acl {
} ____cacheline_aligned;
struct se_session {
- /* Used for MIB access */
- atomic_t mib_ref_count;
u64 sess_bin_isid;
struct se_node_acl *se_node_acl;
struct se_portal_group *se_tpg;
@@ -806,7 +816,6 @@ struct se_hba {
/* Virtual iSCSI devices attached. */
u32 dev_count;
u32 hba_index;
- atomic_t dev_mib_access_count;
atomic_t load_balance_queue;
atomic_t left_queue_depth;
/* Maximum queue depth the HBA can handle. */
@@ -845,6 +854,12 @@ struct se_lun {
#define SE_LUN(c) ((struct se_lun *)(c)->se_lun)
+struct scsi_port_stats {
+ u64 cmd_pdus;
+ u64 tx_data_octets;
+ u64 rx_data_octets;
+} ____cacheline_aligned;
+
struct se_port {
/* RELATIVE TARGET PORT IDENTIFER */
u16 sep_rtpi;
@@ -867,6 +882,7 @@ struct se_port {
} ____cacheline_aligned;
struct se_tpg_np {
+ struct se_portal_group *tpg_np_parent;
struct config_group tpg_np_group;
} ____cacheline_aligned;
diff --git a/include/target/target_core_transport.h b/include/target/target_core_transport.h
index 66f44e56eb80..2e8ec51f0615 100644
--- a/include/target/target_core_transport.h
+++ b/include/target/target_core_transport.h
@@ -111,6 +111,8 @@ struct se_subsystem_api;
extern int init_se_global(void);
extern void release_se_global(void);
+extern void init_scsi_index_table(void);
+extern u32 scsi_get_new_index(scsi_index_t);
extern void transport_init_queue_obj(struct se_queue_obj *);
extern int transport_subsystem_check_init(void);
extern int transport_subsystem_register(struct se_subsystem_api *);
@@ -133,6 +135,8 @@ extern void transport_complete_task(struct se_task *, int);
extern void transport_add_task_to_execute_queue(struct se_task *,
struct se_task *,
struct se_device *);
+extern void transport_remove_task_from_execute_queue(struct se_task *,
+ struct se_device *);
unsigned char *transport_dump_cmd_direction(struct se_cmd *);
extern void transport_dump_dev_state(struct se_device *, char *, int *);
extern void transport_dump_dev_info(struct se_device *, struct se_lun *,
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index aba421d68f6f..78f18adb49c8 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -31,7 +31,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
0 : blk_rq_sectors(rq);
__entry->errors = rq->errors;
- blk_fill_rwbs_rq(__entry->rwbs, rq);
+ blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
blk_dump_cmd(__get_str(cmd), rq);
),
@@ -118,7 +118,7 @@ DECLARE_EVENT_CLASS(block_rq,
__entry->bytes = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
blk_rq_bytes(rq) : 0;
- blk_fill_rwbs_rq(__entry->rwbs, rq);
+ blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
blk_dump_cmd(__get_str(cmd), rq);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
@@ -563,7 +563,7 @@ TRACE_EVENT(block_rq_remap,
__entry->nr_sector = blk_rq_sectors(rq);
__entry->old_dev = dev;
__entry->old_sector = from;
- blk_fill_rwbs_rq(__entry->rwbs, rq);
+ blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
),
TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 7eee77895cb3..4cbbcef6baa8 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -17,36 +17,36 @@ TRACE_EVENT(mce_record,
TP_STRUCT__entry(
__field( u64, mcgcap )
__field( u64, mcgstatus )
- __field( u8, bank )
__field( u64, status )
__field( u64, addr )
__field( u64, misc )
__field( u64, ip )
- __field( u8, cs )
__field( u64, tsc )
__field( u64, walltime )
__field( u32, cpu )
__field( u32, cpuid )
__field( u32, apicid )
__field( u32, socketid )
+ __field( u8, cs )
+ __field( u8, bank )
__field( u8, cpuvendor )
),
TP_fast_assign(
__entry->mcgcap = m->mcgcap;
__entry->mcgstatus = m->mcgstatus;
- __entry->bank = m->bank;
__entry->status = m->status;
__entry->addr = m->addr;
__entry->misc = m->misc;
__entry->ip = m->ip;
- __entry->cs = m->cs;
__entry->tsc = m->tsc;
__entry->walltime = m->time;
__entry->cpu = m->extcpu;
__entry->cpuid = m->cpuid;
__entry->apicid = m->apicid;
__entry->socketid = m->socketid;
+ __entry->cs = m->cs;
+ __entry->bank = m->bank;
__entry->cpuvendor = m->cpuvendor;
),
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index c6bae36547e5..21a546d27c0c 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -108,14 +108,14 @@ TRACE_EVENT(module_request,
TP_ARGS(name, wait, ip),
TP_STRUCT__entry(
- __field( bool, wait )
__field( unsigned long, ip )
+ __field( bool, wait )
__string( name, name )
),
TP_fast_assign(
- __entry->wait = wait;
__entry->ip = ip;
+ __entry->wait = wait;
__assign_str(name, name);
),
@@ -129,4 +129,3 @@ TRACE_EVENT(module_request,
/* This part must be outside protection */
#include <trace/define_trace.h>
-
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index f10293c41b1e..0c68ae22da22 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -19,14 +19,14 @@ TRACE_EVENT(kfree_skb,
TP_STRUCT__entry(
__field( void *, skbaddr )
- __field( unsigned short, protocol )
__field( void *, location )
+ __field( unsigned short, protocol )
),
TP_fast_assign(
__entry->skbaddr = skb;
- __entry->protocol = ntohs(skb->protocol);
__entry->location = location;
+ __entry->protocol = ntohs(skb->protocol);
),
TP_printk("skbaddr=%p protocol=%u location=%p",
diff --git a/include/xen/events.h b/include/xen/events.h
index 00f53ddcc062..962da2ced5b4 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -75,11 +75,9 @@ int xen_allocate_pirq(unsigned gsi, int shareable, char *name);
int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name);
#ifdef CONFIG_PCI_MSI
-/* Allocate an irq and a pirq to be used with MSIs. */
-#define XEN_ALLOC_PIRQ (1 << 0)
-#define XEN_ALLOC_IRQ (1 << 1)
-void xen_allocate_pirq_msi(char *name, int *irq, int *pirq, int alloc_mask);
-int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type);
+int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc);
+int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ int pirq, int vector, const char *name);
#endif
/* De-allocates the above mentioned physical interrupt. */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index c2d1fa4dc1ee..61e523af3c46 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -51,11 +51,7 @@ typedef uint64_t blkif_sector_t;
*/
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
-struct blkif_request {
- uint8_t operation; /* BLKIF_OP_??? */
- uint8_t nr_segments; /* number of segments */
- blkif_vdev_t handle; /* only for read/write requests */
- uint64_t id; /* private guest value, echoed in resp */
+struct blkif_request_rw {
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment {
grant_ref_t gref; /* reference to I/O buffer frame */
@@ -65,6 +61,16 @@ struct blkif_request {
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
+struct blkif_request {
+ uint8_t operation; /* BLKIF_OP_??? */
+ uint8_t nr_segments; /* number of segments */
+ blkif_vdev_t handle; /* only for read/write requests */
+ uint64_t id; /* private guest value, echoed in resp */
+ union {
+ struct blkif_request_rw rw;
+ } u;
+};
+
struct blkif_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
@@ -91,4 +97,25 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
#define VDISK_REMOVABLE 0x2
#define VDISK_READONLY 0x4
+/* Xen-defined major numbers for virtual disks, they look strangely
+ * familiar */
+#define XEN_IDE0_MAJOR 3
+#define XEN_IDE1_MAJOR 22
+#define XEN_SCSI_DISK0_MAJOR 8
+#define XEN_SCSI_DISK1_MAJOR 65
+#define XEN_SCSI_DISK2_MAJOR 66
+#define XEN_SCSI_DISK3_MAJOR 67
+#define XEN_SCSI_DISK4_MAJOR 68
+#define XEN_SCSI_DISK5_MAJOR 69
+#define XEN_SCSI_DISK6_MAJOR 70
+#define XEN_SCSI_DISK7_MAJOR 71
+#define XEN_SCSI_DISK8_MAJOR 128
+#define XEN_SCSI_DISK9_MAJOR 129
+#define XEN_SCSI_DISK10_MAJOR 130
+#define XEN_SCSI_DISK11_MAJOR 131
+#define XEN_SCSI_DISK12_MAJOR 132
+#define XEN_SCSI_DISK13_MAJOR 133
+#define XEN_SCSI_DISK14_MAJOR 134
+#define XEN_SCSI_DISK15_MAJOR 135
+
#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 2befa3e2f1bc..b33257bc7e83 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -30,7 +30,7 @@
#define __HYPERVISOR_stack_switch 3
#define __HYPERVISOR_set_callbacks 4
#define __HYPERVISOR_fpu_taskswitch 5
-#define __HYPERVISOR_sched_op 6
+#define __HYPERVISOR_sched_op_compat 6
#define __HYPERVISOR_dom0_op 7
#define __HYPERVISOR_set_debugreg 8
#define __HYPERVISOR_get_debugreg 9
@@ -52,7 +52,7 @@
#define __HYPERVISOR_mmuext_op 26
#define __HYPERVISOR_acm_op 27
#define __HYPERVISOR_nmi_op 28
-#define __HYPERVISOR_sched_op_new 29
+#define __HYPERVISOR_sched_op 29
#define __HYPERVISOR_callback_op 30
#define __HYPERVISOR_xenoprof_op 31
#define __HYPERVISOR_event_channel_op 32
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 98b92154a264..03c85d7387fb 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -5,9 +5,9 @@
DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
-void xen_pre_suspend(void);
-void xen_post_suspend(int suspend_cancelled);
-void xen_hvm_post_suspend(int suspend_cancelled);
+void xen_arch_pre_suspend(void);
+void xen_arch_post_suspend(int suspend_cancelled);
+void xen_arch_hvm_post_suspend(int suspend_cancelled);
void xen_mm_pin_all(void);
void xen_mm_unpin_all(void);
diff --git a/init/Kconfig b/init/Kconfig
index be788c0957d4..5721d27af626 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -287,6 +287,18 @@ config BSD_PROCESS_ACCT_V3
for processing it. A preliminary version of these tools is available
at <http://www.gnu.org/software/acct/>.
+config FHANDLE
+ bool "open by fhandle syscalls"
+ select EXPORTFS
+ help
+ If you say Y here, a user level program will be able to map
+ file names to handle and then later use the handle for
+ different file system operations. This is useful in implementing
+ userspace file servers, which now track files using handles instead
+ of names. The handle would remain the same even if file names
+ get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
+ syscalls.
+
config TASKSTATS
bool "Export task/process statistics through netlink (EXPERIMENTAL)"
depends on NET
@@ -683,6 +695,16 @@ config CGROUP_MEM_RES_CTLR_SWAP_ENABLED
select this option (if, for some reason, they need to disable it
then noswapaccount does the trick).
+config CGROUP_PERF
+ bool "Enable perf_event per-cpu per-container group (cgroup) monitoring"
+ depends on PERF_EVENTS && CGROUPS
+ help
+ This option extends the per-cpu mode to restrict monitoring to
+ threads which belong to the cgroup specified and run on the
+ designated cpu.
+
+ Say N if unsure.
+
menuconfig CGROUP_SCHED
bool "Group CPU scheduler"
depends on EXPERIMENTAL
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index d2e3c7866460..e683869365d9 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -144,9 +144,9 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
}
/* Initialize a parent watch entry. */
-static struct audit_parent *audit_init_parent(struct nameidata *ndp)
+static struct audit_parent *audit_init_parent(struct path *path)
{
- struct inode *inode = ndp->path.dentry->d_inode;
+ struct inode *inode = path->dentry->d_inode;
struct audit_parent *parent;
int ret;
@@ -353,53 +353,40 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
}
/* Get path information necessary for adding watches. */
-static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw)
+static int audit_get_nd(struct audit_watch *watch, struct path *parent)
{
- struct nameidata *ndparent, *ndwatch;
+ struct nameidata nd;
+ struct dentry *d;
int err;
- ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL);
- if (unlikely(!ndparent))
- return -ENOMEM;
+ err = kern_path_parent(watch->path, &nd);
+ if (err)
+ return err;
- ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL);
- if (unlikely(!ndwatch)) {
- kfree(ndparent);
- return -ENOMEM;
+ if (nd.last_type != LAST_NORM) {
+ path_put(&nd.path);
+ return -EINVAL;
}
- err = path_lookup(path, LOOKUP_PARENT, ndparent);
- if (err) {
- kfree(ndparent);
- kfree(ndwatch);
- return err;
+ mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
+ if (IS_ERR(d)) {
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ path_put(&nd.path);
+ return PTR_ERR(d);
}
-
- err = path_lookup(path, 0, ndwatch);
- if (err) {
- kfree(ndwatch);
- ndwatch = NULL;
+ if (d->d_inode) {
+ /* update watch filter fields */
+ watch->dev = d->d_inode->i_sb->s_dev;
+ watch->ino = d->d_inode->i_ino;
}
+ mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
- *ndp = ndparent;
- *ndw = ndwatch;
-
+ *parent = nd.path;
+ dput(d);
return 0;
}
-/* Release resources used for watch path information. */
-static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw)
-{
- if (ndp) {
- path_put(&ndp->path);
- kfree(ndp);
- }
- if (ndw) {
- path_put(&ndw->path);
- kfree(ndw);
- }
-}
-
/* Associate the given rule with an existing parent.
* Caller must hold audit_filter_mutex. */
static void audit_add_to_parent(struct audit_krule *krule,
@@ -440,31 +427,24 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
{
struct audit_watch *watch = krule->watch;
struct audit_parent *parent;
- struct nameidata *ndp = NULL, *ndw = NULL;
+ struct path parent_path;
int h, ret = 0;
mutex_unlock(&audit_filter_mutex);
/* Avoid calling path_lookup under audit_filter_mutex. */
- ret = audit_get_nd(watch->path, &ndp, &ndw);
- if (ret) {
- /* caller expects mutex locked */
- mutex_lock(&audit_filter_mutex);
- goto error;
- }
+ ret = audit_get_nd(watch, &parent_path);
+ /* caller expects mutex locked */
mutex_lock(&audit_filter_mutex);
- /* update watch filter fields */
- if (ndw) {
- watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev;
- watch->ino = ndw->path.dentry->d_inode->i_ino;
- }
+ if (ret)
+ return ret;
/* either find an old parent or attach a new one */
- parent = audit_find_parent(ndp->path.dentry->d_inode);
+ parent = audit_find_parent(parent_path.dentry->d_inode);
if (!parent) {
- parent = audit_init_parent(ndp);
+ parent = audit_init_parent(&parent_path);
if (IS_ERR(parent)) {
ret = PTR_ERR(parent);
goto error;
@@ -479,9 +459,8 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
h = audit_hash_ino((u32)watch->ino);
*list = &audit_inode_hash[h];
error:
- audit_put_nd(ndp, ndw); /* NULL args OK */
+ path_put(&parent_path);
return ret;
-
}
void audit_remove_watch_rule(struct audit_krule *krule)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b24d7027b83c..95362d15128c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4230,20 +4230,8 @@ void cgroup_post_fork(struct task_struct *child)
*/
void cgroup_exit(struct task_struct *tsk, int run_callbacks)
{
- int i;
struct css_set *cg;
-
- if (run_callbacks && need_forkexit_callback) {
- /*
- * modular subsystems can't use callbacks, so no need to lock
- * the subsys array
- */
- for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
- struct cgroup_subsys *ss = subsys[i];
- if (ss->exit)
- ss->exit(ss, tsk);
- }
- }
+ int i;
/*
* Unlink from the css_set task list if necessary.
@@ -4261,7 +4249,24 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
task_lock(tsk);
cg = tsk->cgroups;
tsk->cgroups = &init_css_set;
+
+ if (run_callbacks && need_forkexit_callback) {
+ /*
+ * modular subsystems can't use callbacks, so no need to lock
+ * the subsys array
+ */
+ for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
+ struct cgroup_subsys *ss = subsys[i];
+ if (ss->exit) {
+ struct cgroup *old_cgrp =
+ rcu_dereference_raw(cg->subsys[i])->cgroup;
+ struct cgroup *cgrp = task_cgroup(tsk, i);
+ ss->exit(ss, cgrp, old_cgrp, tsk);
+ }
+ }
+ }
task_unlock(tsk);
+
if (cg)
put_css_set_taskexit(cg);
}
@@ -4813,6 +4818,29 @@ css_get_next(struct cgroup_subsys *ss, int id,
return ret;
}
+/*
+ * get corresponding css from file open on cgroupfs directory
+ */
+struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id)
+{
+ struct cgroup *cgrp;
+ struct inode *inode;
+ struct cgroup_subsys_state *css;
+
+ inode = f->f_dentry->d_inode;
+ /* check in cgroup filesystem dir */
+ if (inode->i_op != &cgroup_dir_inode_operations)
+ return ERR_PTR(-EBADF);
+
+ if (id < 0 || id >= CGROUP_SUBSYS_COUNT)
+ return ERR_PTR(-EINVAL);
+
+ /* get cgroup */
+ cgrp = __d_cgrp(f->f_dentry);
+ css = cgrp->subsys[id];
+ return css ? css : ERR_PTR(-ENOENT);
+}
+
#ifdef CONFIG_CGROUP_DEBUG
static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
struct cgroup *cont)
diff --git a/kernel/compat.c b/kernel/compat.c
index c9e2ec0b34a8..38b1d2c1cbe8 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -52,6 +52,64 @@ static int compat_put_timeval(struct compat_timeval __user *o,
put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0;
}
+static int compat_get_timex(struct timex *txc, struct compat_timex __user *utp)
+{
+ memset(txc, 0, sizeof(struct timex));
+
+ if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) ||
+ __get_user(txc->modes, &utp->modes) ||
+ __get_user(txc->offset, &utp->offset) ||
+ __get_user(txc->freq, &utp->freq) ||
+ __get_user(txc->maxerror, &utp->maxerror) ||
+ __get_user(txc->esterror, &utp->esterror) ||
+ __get_user(txc->status, &utp->status) ||
+ __get_user(txc->constant, &utp->constant) ||
+ __get_user(txc->precision, &utp->precision) ||
+ __get_user(txc->tolerance, &utp->tolerance) ||
+ __get_user(txc->time.tv_sec, &utp->time.tv_sec) ||
+ __get_user(txc->time.tv_usec, &utp->time.tv_usec) ||
+ __get_user(txc->tick, &utp->tick) ||
+ __get_user(txc->ppsfreq, &utp->ppsfreq) ||
+ __get_user(txc->jitter, &utp->jitter) ||
+ __get_user(txc->shift, &utp->shift) ||
+ __get_user(txc->stabil, &utp->stabil) ||
+ __get_user(txc->jitcnt, &utp->jitcnt) ||
+ __get_user(txc->calcnt, &utp->calcnt) ||
+ __get_user(txc->errcnt, &utp->errcnt) ||
+ __get_user(txc->stbcnt, &utp->stbcnt))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int compat_put_timex(struct compat_timex __user *utp, struct timex *txc)
+{
+ if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) ||
+ __put_user(txc->modes, &utp->modes) ||
+ __put_user(txc->offset, &utp->offset) ||
+ __put_user(txc->freq, &utp->freq) ||
+ __put_user(txc->maxerror, &utp->maxerror) ||
+ __put_user(txc->esterror, &utp->esterror) ||
+ __put_user(txc->status, &utp->status) ||
+ __put_user(txc->constant, &utp->constant) ||
+ __put_user(txc->precision, &utp->precision) ||
+ __put_user(txc->tolerance, &utp->tolerance) ||
+ __put_user(txc->time.tv_sec, &utp->time.tv_sec) ||
+ __put_user(txc->time.tv_usec, &utp->time.tv_usec) ||
+ __put_user(txc->tick, &utp->tick) ||
+ __put_user(txc->ppsfreq, &utp->ppsfreq) ||
+ __put_user(txc->jitter, &utp->jitter) ||
+ __put_user(txc->shift, &utp->shift) ||
+ __put_user(txc->stabil, &utp->stabil) ||
+ __put_user(txc->jitcnt, &utp->jitcnt) ||
+ __put_user(txc->calcnt, &utp->calcnt) ||
+ __put_user(txc->errcnt, &utp->errcnt) ||
+ __put_user(txc->stbcnt, &utp->stbcnt) ||
+ __put_user(txc->tai, &utp->tai))
+ return -EFAULT;
+ return 0;
+}
+
asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
struct timezone __user *tz)
{
@@ -617,6 +675,29 @@ long compat_sys_clock_gettime(clockid_t which_clock,
return err;
}
+long compat_sys_clock_adjtime(clockid_t which_clock,
+ struct compat_timex __user *utp)
+{
+ struct timex txc;
+ mm_segment_t oldfs;
+ int err, ret;
+
+ err = compat_get_timex(&txc, utp);
+ if (err)
+ return err;
+
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = sys_clock_adjtime(which_clock, (struct timex __user *) &txc);
+ set_fs(oldfs);
+
+ err = compat_put_timex(utp, &txc);
+ if (err)
+ return err;
+
+ return ret;
+}
+
long compat_sys_clock_getres(clockid_t which_clock,
struct compat_timespec __user *tp)
{
@@ -951,58 +1032,17 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat
asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp)
{
struct timex txc;
- int ret;
-
- memset(&txc, 0, sizeof(struct timex));
+ int err, ret;
- if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) ||
- __get_user(txc.modes, &utp->modes) ||
- __get_user(txc.offset, &utp->offset) ||
- __get_user(txc.freq, &utp->freq) ||
- __get_user(txc.maxerror, &utp->maxerror) ||
- __get_user(txc.esterror, &utp->esterror) ||
- __get_user(txc.status, &utp->status) ||
- __get_user(txc.constant, &utp->constant) ||
- __get_user(txc.precision, &utp->precision) ||
- __get_user(txc.tolerance, &utp->tolerance) ||
- __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
- __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
- __get_user(txc.tick, &utp->tick) ||
- __get_user(txc.ppsfreq, &utp->ppsfreq) ||
- __get_user(txc.jitter, &utp->jitter) ||
- __get_user(txc.shift, &utp->shift) ||
- __get_user(txc.stabil, &utp->stabil) ||
- __get_user(txc.jitcnt, &utp->jitcnt) ||
- __get_user(txc.calcnt, &utp->calcnt) ||
- __get_user(txc.errcnt, &utp->errcnt) ||
- __get_user(txc.stbcnt, &utp->stbcnt))
- return -EFAULT;
+ err = compat_get_timex(&txc, utp);
+ if (err)
+ return err;
ret = do_adjtimex(&txc);
- if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) ||
- __put_user(txc.modes, &utp->modes) ||
- __put_user(txc.offset, &utp->offset) ||
- __put_user(txc.freq, &utp->freq) ||
- __put_user(txc.maxerror, &utp->maxerror) ||
- __put_user(txc.esterror, &utp->esterror) ||
- __put_user(txc.status, &utp->status) ||
- __put_user(txc.constant, &utp->constant) ||
- __put_user(txc.precision, &utp->precision) ||
- __put_user(txc.tolerance, &utp->tolerance) ||
- __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
- __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
- __put_user(txc.tick, &utp->tick) ||
- __put_user(txc.ppsfreq, &utp->ppsfreq) ||
- __put_user(txc.jitter, &utp->jitter) ||
- __put_user(txc.shift, &utp->shift) ||
- __put_user(txc.stabil, &utp->stabil) ||
- __put_user(txc.jitcnt, &utp->jitcnt) ||
- __put_user(txc.calcnt, &utp->calcnt) ||
- __put_user(txc.errcnt, &utp->errcnt) ||
- __put_user(txc.stbcnt, &utp->stbcnt) ||
- __put_user(txc.tai, &utp->tai))
- ret = -EFAULT;
+ err = compat_put_timex(utp, &txc);
+ if (err)
+ return err;
return ret;
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 4349935c2ad8..e92e98189032 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1575,8 +1575,10 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
return -ENODEV;
trialcs = alloc_trial_cpuset(cs);
- if (!trialcs)
- return -ENOMEM;
+ if (!trialcs) {
+ retval = -ENOMEM;
+ goto out;
+ }
switch (cft->private) {
case FILE_CPULIST:
@@ -1591,6 +1593,7 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
}
free_trial_cpuset(trialcs);
+out:
cgroup_unlock();
return retval;
}
diff --git a/kernel/cred.c b/kernel/cred.c
index 3a9d6dd53a6c..2343c132c5a7 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -35,7 +35,7 @@ static struct kmem_cache *cred_jar;
static struct thread_group_cred init_tgcred = {
.usage = ATOMIC_INIT(2),
.tgid = 0,
- .lock = SPIN_LOCK_UNLOCKED,
+ .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock),
};
#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index b766d28accd6..bda415715382 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -381,15 +381,16 @@ static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
return NULL;
}
-static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
+static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr,
+ u32 uval, u32 newval)
{
- u32 curval;
+ int ret;
pagefault_disable();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
pagefault_enable();
- return curval;
+ return ret;
}
static int get_futex_value_locked(u32 *dest, u32 __user *from)
@@ -674,7 +675,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
struct task_struct *task, int set_waiters)
{
int lock_taken, ret, ownerdied = 0;
- u32 uval, newval, curval;
+ u32 uval, newval, curval, vpid = task_pid_vnr(task);
retry:
ret = lock_taken = 0;
@@ -684,19 +685,17 @@ retry:
* (by doing a 0 -> TID atomic cmpxchg), while holding all
* the locks. It will most likely not succeed.
*/
- newval = task_pid_vnr(task);
+ newval = vpid;
if (set_waiters)
newval |= FUTEX_WAITERS;
- curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
-
- if (unlikely(curval == -EFAULT))
+ if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval)))
return -EFAULT;
/*
* Detect deadlocks.
*/
- if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task))))
+ if ((unlikely((curval & FUTEX_TID_MASK) == vpid)))
return -EDEADLK;
/*
@@ -723,14 +722,12 @@ retry:
*/
if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
/* Keep the OWNER_DIED bit */
- newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task);
+ newval = (curval & ~FUTEX_TID_MASK) | vpid;
ownerdied = 0;
lock_taken = 1;
}
- curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
-
- if (unlikely(curval == -EFAULT))
+ if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
return -EFAULT;
if (unlikely(curval != uval))
goto retry;
@@ -775,6 +772,24 @@ retry:
return ret;
}
+/**
+ * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket
+ * @q: The futex_q to unqueue
+ *
+ * The q->lock_ptr must not be NULL and must be held by the caller.
+ */
+static void __unqueue_futex(struct futex_q *q)
+{
+ struct futex_hash_bucket *hb;
+
+ if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr)
+ || plist_node_empty(&q->list)))
+ return;
+
+ hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
+ plist_del(&q->list, &hb->chain);
+}
+
/*
* The hash bucket lock must be held when this is called.
* Afterwards, the futex_q must not be accessed.
@@ -792,7 +807,7 @@ static void wake_futex(struct futex_q *q)
*/
get_task_struct(p);
- plist_del(&q->list, &q->list.plist);
+ __unqueue_futex(q);
/*
* The waiting task can free the futex_q as soon as
* q->lock_ptr = NULL is written, without taking any locks. A
@@ -843,9 +858,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
- curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
-
- if (curval == -EFAULT)
+ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
ret = -EFAULT;
else if (curval != uval)
ret = -EINVAL;
@@ -880,10 +893,8 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
* There is no waiter, so we unlock the futex. The owner died
* bit has not to be preserved here. We are the owner:
*/
- oldval = cmpxchg_futex_value_locked(uaddr, uval, 0);
-
- if (oldval == -EFAULT)
- return oldval;
+ if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0))
+ return -EFAULT;
if (oldval != uval)
return -EAGAIN;
@@ -1071,9 +1082,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
plist_del(&q->list, &hb1->chain);
plist_add(&q->list, &hb2->chain);
q->lock_ptr = &hb2->lock;
-#ifdef CONFIG_DEBUG_PI_LIST
- q->list.plist.spinlock = &hb2->lock;
-#endif
}
get_futex_key_refs(key2);
q->key = *key2;
@@ -1100,16 +1108,12 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
get_futex_key_refs(key);
q->key = *key;
- WARN_ON(plist_node_empty(&q->list));
- plist_del(&q->list, &q->list.plist);
+ __unqueue_futex(q);
WARN_ON(!q->rt_waiter);
q->rt_waiter = NULL;
q->lock_ptr = &hb->lock;
-#ifdef CONFIG_DEBUG_PI_LIST
- q->list.plist.spinlock = &hb->lock;
-#endif
wake_up_state(q->task, TASK_NORMAL);
}
@@ -1457,9 +1461,6 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
prio = min(current->normal_prio, MAX_RT_PRIO);
plist_node_init(&q->list, prio);
-#ifdef CONFIG_DEBUG_PI_LIST
- q->list.plist.spinlock = &hb->lock;
-#endif
plist_add(&q->list, &hb->chain);
q->task = current;
spin_unlock(&hb->lock);
@@ -1504,8 +1505,7 @@ retry:
spin_unlock(lock_ptr);
goto retry;
}
- WARN_ON(plist_node_empty(&q->list));
- plist_del(&q->list, &q->list.plist);
+ __unqueue_futex(q);
BUG_ON(q->pi_state);
@@ -1525,8 +1525,7 @@ retry:
static void unqueue_me_pi(struct futex_q *q)
__releases(q->lock_ptr)
{
- WARN_ON(plist_node_empty(&q->list));
- plist_del(&q->list, &q->list.plist);
+ __unqueue_futex(q);
BUG_ON(!q->pi_state);
free_pi_state(q->pi_state);
@@ -1556,10 +1555,10 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
/*
* We are here either because we stole the rtmutex from the
- * pending owner or we are the pending owner which failed to
- * get the rtmutex. We have to replace the pending owner TID
- * in the user space variable. This must be atomic as we have
- * to preserve the owner died bit here.
+ * previous highest priority waiter or we are the highest priority
+ * waiter but failed to get the rtmutex the first time.
+ * We have to replace the newowner TID in the user space variable.
+ * This must be atomic as we have to preserve the owner died bit here.
*
* Note: We write the user space value _before_ changing the pi_state
* because we can fault here. Imagine swapped out pages or a fork
@@ -1578,9 +1577,7 @@ retry:
while (1) {
newval = (uval & FUTEX_OWNER_DIED) | newtid;
- curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
-
- if (curval == -EFAULT)
+ if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
goto handle_fault;
if (curval == uval)
break;
@@ -1608,8 +1605,8 @@ retry:
/*
* To handle the page fault we need to drop the hash bucket
- * lock here. That gives the other task (either the pending
- * owner itself or the task which stole the rtmutex) the
+ * lock here. That gives the other task (either the highest priority
+ * waiter itself or the task which stole the rtmutex) the
* chance to try the fixup of the pi_state. So once we are
* back from handling the fault we need to check the pi_state
* after reacquiring the hash bucket lock and before trying to
@@ -1685,18 +1682,20 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
/*
* pi_state is incorrect, some other task did a lock steal and
* we returned due to timeout or signal without taking the
- * rt_mutex. Too late. We can access the rt_mutex_owner without
- * locking, as the other task is now blocked on the hash bucket
- * lock. Fix the state up.
+ * rt_mutex. Too late.
*/
+ raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
owner = rt_mutex_owner(&q->pi_state->pi_mutex);
+ if (!owner)
+ owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
+ raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
ret = fixup_pi_state_owner(uaddr, q, owner);
goto out;
}
/*
* Paranoia check. If we did not take the lock, then we should not be
- * the owner, nor the pending owner, of the rt_mutex.
+ * the owner of the rt_mutex.
*/
if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
@@ -1781,13 +1780,14 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
*
* The basic logical guarantee of a futex is that it blocks ONLY
* if cond(var) is known to be true at the time of blocking, for
- * any cond. If we queued after testing *uaddr, that would open
- * a race condition where we could block indefinitely with
+ * any cond. If we locked the hash-bucket after testing *uaddr, that
+ * would open a race condition where we could block indefinitely with
* cond(var) false, which would violate the guarantee.
*
- * A consequence is that futex_wait() can return zero and absorb
- * a wakeup when *uaddr != val on entry to the syscall. This is
- * rare, but normal.
+ * On the other hand, we insert q and release the hash-bucket only
+ * after testing *uaddr. This guarantees that futex_wait() will NOT
+ * absorb a wakeup if *uaddr does not match the desired values
+ * while the syscall executes.
*/
retry:
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key);
@@ -2046,9 +2046,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
- u32 uval;
struct plist_head *head;
union futex_key key = FUTEX_KEY_INIT;
+ u32 uval, vpid = task_pid_vnr(current);
int ret;
retry:
@@ -2057,7 +2057,7 @@ retry:
/*
* We release only a lock we actually own:
*/
- if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
+ if ((uval & FUTEX_TID_MASK) != vpid)
return -EPERM;
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key);
@@ -2072,17 +2072,14 @@ retry:
* again. If it succeeds then we can return without waking
* anyone else up:
*/
- if (!(uval & FUTEX_OWNER_DIED))
- uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0);
-
-
- if (unlikely(uval == -EFAULT))
+ if (!(uval & FUTEX_OWNER_DIED) &&
+ cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
goto pi_faulted;
/*
* Rare case: we managed to release the lock atomically,
* no need to wake anyone else up:
*/
- if (unlikely(uval == task_pid_vnr(current)))
+ if (unlikely(uval == vpid))
goto out_unlock;
/*
@@ -2167,7 +2164,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
* We were woken prior to requeue by a timeout or a signal.
* Unqueue the futex_q and determine which it was.
*/
- plist_del(&q->list, &q->list.plist);
+ plist_del(&q->list, &hb->chain);
/* Handle spurious wakeups gracefully */
ret = -EWOULDBLOCK;
@@ -2463,11 +2460,20 @@ retry:
* userspace.
*/
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
- nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
-
- if (nval == -EFAULT)
- return -1;
-
+ /*
+ * We are not holding a lock here, but we want to have
+ * the pagefault_disable/enable() protection because
+ * we want to handle the fault gracefully. If the
+ * access fails we try to fault in the futex with R/W
+ * verification via get_user_pages. get_user() above
+ * does not guarantee R/W access. If that fails we
+ * give up and leave the futex locked.
+ */
+ if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) {
+ if (fault_in_user_writeable(uaddr))
+ return -1;
+ goto retry;
+ }
if (nval != uval)
goto retry;
@@ -2678,8 +2684,7 @@ static int __init futex_init(void)
* implementation, the non-functional ones will return
* -ENOSYS.
*/
- curval = cmpxchg_futex_value_locked(NULL, 0, 0);
- if (curval == -EFAULT)
+ if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
futex_cmpxchg_enabled = 1;
for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0c8d7c048615..9017478c5d4c 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -53,11 +53,10 @@
/*
* The timer bases:
*
- * Note: If we want to add new timer bases, we have to skip the two
- * clock ids captured by the cpu-timers. We do this by holding empty
- * entries rather than doing math adjustment of the clock ids.
- * This ensures that we capture erroneous accesses to these clock ids
- * rather than moving them into the range of valid clock id's.
+ * There are more clockids then hrtimer bases. Thus, we index
+ * into the timer bases by the hrtimer_base_type enum. When trying
+ * to reach a base using a clockid, hrtimer_clockid_to_base()
+ * is used to convert from clockid to the proper hrtimer_base_type.
*/
DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
{
@@ -74,30 +73,39 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
.get_time = &ktime_get,
.resolution = KTIME_LOW_RES,
},
+ {
+ .index = CLOCK_BOOTTIME,
+ .get_time = &ktime_get_boottime,
+ .resolution = KTIME_LOW_RES,
+ },
}
};
+static int hrtimer_clock_to_base_table[MAX_CLOCKS];
+
+static inline int hrtimer_clockid_to_base(clockid_t clock_id)
+{
+ return hrtimer_clock_to_base_table[clock_id];
+}
+
+
/*
* Get the coarse grained time at the softirq based on xtime and
* wall_to_monotonic.
*/
static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
{
- ktime_t xtim, tomono;
- struct timespec xts, tom;
- unsigned long seq;
+ ktime_t xtim, mono, boot;
+ struct timespec xts, tom, slp;
- do {
- seq = read_seqbegin(&xtime_lock);
- xts = __current_kernel_time();
- tom = __get_wall_to_monotonic();
- } while (read_seqretry(&xtime_lock, seq));
+ get_xtime_and_monotonic_and_sleep_offset(&xts, &tom, &slp);
xtim = timespec_to_ktime(xts);
- tomono = timespec_to_ktime(tom);
- base->clock_base[CLOCK_REALTIME].softirq_time = xtim;
- base->clock_base[CLOCK_MONOTONIC].softirq_time =
- ktime_add(xtim, tomono);
+ mono = ktime_add(xtim, timespec_to_ktime(tom));
+ boot = ktime_add(mono, timespec_to_ktime(slp));
+ base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
+ base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
+ base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
}
/*
@@ -184,10 +192,11 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
struct hrtimer_cpu_base *new_cpu_base;
int this_cpu = smp_processor_id();
int cpu = hrtimer_get_target(this_cpu, pinned);
+ int basenum = hrtimer_clockid_to_base(base->index);
again:
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
- new_base = &new_cpu_base->clock_base[base->index];
+ new_base = &new_cpu_base->clock_base[basenum];
if (base != new_base) {
/*
@@ -334,6 +343,11 @@ EXPORT_SYMBOL_GPL(ktime_add_safe);
static struct debug_obj_descr hrtimer_debug_descr;
+static void *hrtimer_debug_hint(void *addr)
+{
+ return ((struct hrtimer *) addr)->function;
+}
+
/*
* fixup_init is called when:
* - an active object is initialized
@@ -393,6 +407,7 @@ static int hrtimer_fixup_free(void *addr, enum debug_obj_state state)
static struct debug_obj_descr hrtimer_debug_descr = {
.name = "hrtimer",
+ .debug_hint = hrtimer_debug_hint,
.fixup_init = hrtimer_fixup_init,
.fixup_activate = hrtimer_fixup_activate,
.fixup_free = hrtimer_fixup_free,
@@ -611,24 +626,23 @@ static int hrtimer_reprogram(struct hrtimer *timer,
static void retrigger_next_event(void *arg)
{
struct hrtimer_cpu_base *base;
- struct timespec realtime_offset, wtm;
- unsigned long seq;
+ struct timespec realtime_offset, wtm, sleep;
if (!hrtimer_hres_active())
return;
- do {
- seq = read_seqbegin(&xtime_lock);
- wtm = __get_wall_to_monotonic();
- } while (read_seqretry(&xtime_lock, seq));
+ get_xtime_and_monotonic_and_sleep_offset(&realtime_offset, &wtm,
+ &sleep);
set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
base = &__get_cpu_var(hrtimer_bases);
/* Adjust CLOCK_REALTIME offset */
raw_spin_lock(&base->lock);
- base->clock_base[CLOCK_REALTIME].offset =
+ base->clock_base[HRTIMER_BASE_REALTIME].offset =
timespec_to_ktime(realtime_offset);
+ base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
+ timespec_to_ktime(sleep);
hrtimer_force_reprogram(base, 0);
raw_spin_unlock(&base->lock);
@@ -673,14 +687,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
}
/*
- * Initialize the high resolution related parts of a hrtimer
- */
-static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
-{
-}
-
-
-/*
* When High resolution timers are active, try to reprogram. Note, that in case
* the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry
* check happens. The timer gets enqueued into the rbtree. The reprogramming
@@ -725,8 +731,9 @@ static int hrtimer_switch_to_hres(void)
return 0;
}
base->hres_active = 1;
- base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES;
- base->clock_base[CLOCK_MONOTONIC].resolution = KTIME_HIGH_RES;
+ base->clock_base[HRTIMER_BASE_REALTIME].resolution = KTIME_HIGH_RES;
+ base->clock_base[HRTIMER_BASE_MONOTONIC].resolution = KTIME_HIGH_RES;
+ base->clock_base[HRTIMER_BASE_BOOTTIME].resolution = KTIME_HIGH_RES;
tick_setup_sched_timer();
@@ -750,7 +757,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
return 0;
}
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
-static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
#endif /* CONFIG_HIGH_RES_TIMERS */
@@ -1121,6 +1127,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode)
{
struct hrtimer_cpu_base *cpu_base;
+ int base;
memset(timer, 0, sizeof(struct hrtimer));
@@ -1129,8 +1136,8 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
clock_id = CLOCK_MONOTONIC;
- timer->base = &cpu_base->clock_base[clock_id];
- hrtimer_init_timer_hres(timer);
+ base = hrtimer_clockid_to_base(clock_id);
+ timer->base = &cpu_base->clock_base[base];
timerqueue_init(&timer->node);
#ifdef CONFIG_TIMER_STATS
@@ -1165,9 +1172,10 @@ EXPORT_SYMBOL_GPL(hrtimer_init);
int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
{
struct hrtimer_cpu_base *cpu_base;
+ int base = hrtimer_clockid_to_base(which_clock);
cpu_base = &__raw_get_cpu_var(hrtimer_bases);
- *tp = ktime_to_timespec(cpu_base->clock_base[which_clock].resolution);
+ *tp = ktime_to_timespec(cpu_base->clock_base[base].resolution);
return 0;
}
@@ -1714,6 +1722,10 @@ static struct notifier_block __cpuinitdata hrtimers_nb = {
void __init hrtimers_init(void)
{
+ hrtimer_clock_to_base_table[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME;
+ hrtimer_clock_to_base_table[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC;
+ hrtimer_clock_to_base_table[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME;
+
hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
register_cpu_notifier(&hrtimers_nb);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 8e42fec7686d..09bef82d74cb 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -1,5 +1,6 @@
+# Select this to activate the generic irq options below
config HAVE_GENERIC_HARDIRQS
- def_bool n
+ bool
if HAVE_GENERIC_HARDIRQS
menu "IRQ subsystem"
@@ -11,26 +12,44 @@ config GENERIC_HARDIRQS
# Select this to disable the deprecated stuff
config GENERIC_HARDIRQS_NO_DEPRECATED
- def_bool n
+ bool
+
+config GENERIC_HARDIRQS_NO_COMPAT
+ bool
# Options selectable by the architecture code
+
+# Make sparse irq Kconfig switch below available
config HAVE_SPARSE_IRQ
- def_bool n
+ bool
+# Enable the generic irq autoprobe mechanism
config GENERIC_IRQ_PROBE
- def_bool n
+ bool
+
+# Use the generic /proc/interrupts implementation
+config GENERIC_IRQ_SHOW
+ bool
+# Support for delayed migration from interrupt context
config GENERIC_PENDING_IRQ
- def_bool n
+ bool
+# Alpha specific irq affinity mechanism
config AUTO_IRQ_AFFINITY
- def_bool n
-
-config IRQ_PER_CPU
- def_bool n
+ bool
+# Tasklet based software resend for pending interrupts on enable_irq()
config HARDIRQS_SW_RESEND
- def_bool n
+ bool
+
+# Preflow handler support for fasteoi (sparc64)
+config IRQ_PREFLOW_FASTEOI
+ bool
+
+# Support forced irq threading
+config IRQ_FORCED_THREADING
+ bool
config SPARSE_IRQ
bool "Support sparse irq numbering"
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 505798f86c36..394784c57060 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -17,7 +17,7 @@
/*
* Autodetection depends on the fact that any interrupt that
* comes in on to an unassigned handler will get stuck with
- * "IRQ_WAITING" cleared and the interrupt disabled.
+ * "IRQS_WAITING" cleared and the interrupt disabled.
*/
static DEFINE_MUTEX(probing_active);
@@ -32,7 +32,6 @@ unsigned long probe_irq_on(void)
{
struct irq_desc *desc;
unsigned long mask = 0;
- unsigned int status;
int i;
/*
@@ -46,13 +45,7 @@ unsigned long probe_irq_on(void)
*/
for_each_irq_desc_reverse(i, desc) {
raw_spin_lock_irq(&desc->lock);
- if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
- /*
- * An old-style architecture might still have
- * the handle_bad_irq handler there:
- */
- compat_irq_chip_set_default_handler(desc);
-
+ if (!desc->action && irq_settings_can_probe(desc)) {
/*
* Some chips need to know about probing in
* progress:
@@ -60,7 +53,7 @@ unsigned long probe_irq_on(void)
if (desc->irq_data.chip->irq_set_type)
desc->irq_data.chip->irq_set_type(&desc->irq_data,
IRQ_TYPE_PROBE);
- desc->irq_data.chip->irq_startup(&desc->irq_data);
+ irq_startup(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
@@ -75,10 +68,12 @@ unsigned long probe_irq_on(void)
*/
for_each_irq_desc_reverse(i, desc) {
raw_spin_lock_irq(&desc->lock);
- if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
- desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
- if (desc->irq_data.chip->irq_startup(&desc->irq_data))
- desc->status |= IRQ_PENDING;
+ if (!desc->action && irq_settings_can_probe(desc)) {
+ desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
+ if (irq_startup(desc)) {
+ irq_compat_set_pending(desc);
+ desc->istate |= IRQS_PENDING;
+ }
}
raw_spin_unlock_irq(&desc->lock);
}
@@ -93,13 +88,12 @@ unsigned long probe_irq_on(void)
*/
for_each_irq_desc(i, desc) {
raw_spin_lock_irq(&desc->lock);
- status = desc->status;
- if (status & IRQ_AUTODETECT) {
+ if (desc->istate & IRQS_AUTODETECT) {
/* It triggered already - consider it spurious. */
- if (!(status & IRQ_WAITING)) {
- desc->status = status & ~IRQ_AUTODETECT;
- desc->irq_data.chip->irq_shutdown(&desc->irq_data);
+ if (!(desc->istate & IRQS_WAITING)) {
+ desc->istate &= ~IRQS_AUTODETECT;
+ irq_shutdown(desc);
} else
if (i < 32)
mask |= 1 << i;
@@ -125,20 +119,18 @@ EXPORT_SYMBOL(probe_irq_on);
*/
unsigned int probe_irq_mask(unsigned long val)
{
- unsigned int status, mask = 0;
+ unsigned int mask = 0;
struct irq_desc *desc;
int i;
for_each_irq_desc(i, desc) {
raw_spin_lock_irq(&desc->lock);
- status = desc->status;
-
- if (status & IRQ_AUTODETECT) {
- if (i < 16 && !(status & IRQ_WAITING))
+ if (desc->istate & IRQS_AUTODETECT) {
+ if (i < 16 && !(desc->istate & IRQS_WAITING))
mask |= 1 << i;
- desc->status = status & ~IRQ_AUTODETECT;
- desc->irq_data.chip->irq_shutdown(&desc->irq_data);
+ desc->istate &= ~IRQS_AUTODETECT;
+ irq_shutdown(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
@@ -169,20 +161,18 @@ int probe_irq_off(unsigned long val)
{
int i, irq_found = 0, nr_of_irqs = 0;
struct irq_desc *desc;
- unsigned int status;
for_each_irq_desc(i, desc) {
raw_spin_lock_irq(&desc->lock);
- status = desc->status;
- if (status & IRQ_AUTODETECT) {
- if (!(status & IRQ_WAITING)) {
+ if (desc->istate & IRQS_AUTODETECT) {
+ if (!(desc->istate & IRQS_WAITING)) {
if (!nr_of_irqs)
irq_found = i;
nr_of_irqs++;
}
- desc->status = status & ~IRQ_AUTODETECT;
- desc->irq_data.chip->irq_shutdown(&desc->irq_data);
+ desc->istate &= ~IRQS_AUTODETECT;
+ irq_shutdown(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index baa5c4acad83..c9c0601f0615 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -19,140 +19,110 @@
#include "internals.h"
/**
- * set_irq_chip - set the irq chip for an irq
+ * irq_set_chip - set the irq chip for an irq
* @irq: irq number
* @chip: pointer to irq chip description structure
*/
-int set_irq_chip(unsigned int irq, struct irq_chip *chip)
+int irq_set_chip(unsigned int irq, struct irq_chip *chip)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
- if (!desc) {
- WARN(1, KERN_ERR "Trying to install chip for IRQ%d\n", irq);
+ if (!desc)
return -EINVAL;
- }
if (!chip)
chip = &no_irq_chip;
- raw_spin_lock_irqsave(&desc->lock, flags);
irq_chip_set_defaults(chip);
desc->irq_data.chip = chip;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-
+ irq_put_desc_unlock(desc, flags);
return 0;
}
-EXPORT_SYMBOL(set_irq_chip);
+EXPORT_SYMBOL(irq_set_chip);
/**
- * set_irq_type - set the irq trigger type for an irq
+ * irq_set_type - set the irq trigger type for an irq
* @irq: irq number
* @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h
*/
-int set_irq_type(unsigned int irq, unsigned int type)
+int irq_set_irq_type(unsigned int irq, unsigned int type)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
- int ret = -ENXIO;
+ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
+ int ret = 0;
- if (!desc) {
- printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
- return -ENODEV;
- }
+ if (!desc)
+ return -EINVAL;
type &= IRQ_TYPE_SENSE_MASK;
- if (type == IRQ_TYPE_NONE)
- return 0;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- ret = __irq_set_trigger(desc, irq, type);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ if (type != IRQ_TYPE_NONE)
+ ret = __irq_set_trigger(desc, irq, type);
+ irq_put_desc_busunlock(desc, flags);
return ret;
}
-EXPORT_SYMBOL(set_irq_type);
+EXPORT_SYMBOL(irq_set_irq_type);
/**
- * set_irq_data - set irq type data for an irq
+ * irq_set_handler_data - set irq handler data for an irq
* @irq: Interrupt number
* @data: Pointer to interrupt specific data
*
* Set the hardware irq controller data for an irq
*/
-int set_irq_data(unsigned int irq, void *data)
+int irq_set_handler_data(unsigned int irq, void *data)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
- if (!desc) {
- printk(KERN_ERR
- "Trying to install controller data for IRQ%d\n", irq);
+ if (!desc)
return -EINVAL;
- }
-
- raw_spin_lock_irqsave(&desc->lock, flags);
desc->irq_data.handler_data = data;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ irq_put_desc_unlock(desc, flags);
return 0;
}
-EXPORT_SYMBOL(set_irq_data);
+EXPORT_SYMBOL(irq_set_handler_data);
/**
- * set_irq_msi - set MSI descriptor data for an irq
+ * irq_set_msi_desc - set MSI descriptor data for an irq
* @irq: Interrupt number
* @entry: Pointer to MSI descriptor data
*
* Set the MSI descriptor entry for an irq
*/
-int set_irq_msi(unsigned int irq, struct msi_desc *entry)
+int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
- if (!desc) {
- printk(KERN_ERR
- "Trying to install msi data for IRQ%d\n", irq);
+ if (!desc)
return -EINVAL;
- }
-
- raw_spin_lock_irqsave(&desc->lock, flags);
desc->irq_data.msi_desc = entry;
if (entry)
entry->irq = irq;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ irq_put_desc_unlock(desc, flags);
return 0;
}
/**
- * set_irq_chip_data - set irq chip data for an irq
+ * irq_set_chip_data - set irq chip data for an irq
* @irq: Interrupt number
* @data: Pointer to chip specific data
*
* Set the hardware irq chip data for an irq
*/
-int set_irq_chip_data(unsigned int irq, void *data)
+int irq_set_chip_data(unsigned int irq, void *data)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
- if (!desc) {
- printk(KERN_ERR
- "Trying to install chip data for IRQ%d\n", irq);
- return -EINVAL;
- }
-
- if (!desc->irq_data.chip) {
- printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
+ if (!desc)
return -EINVAL;
- }
-
- raw_spin_lock_irqsave(&desc->lock, flags);
desc->irq_data.chip_data = data;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-
+ irq_put_desc_unlock(desc, flags);
return 0;
}
-EXPORT_SYMBOL(set_irq_chip_data);
+EXPORT_SYMBOL(irq_set_chip_data);
struct irq_data *irq_get_irq_data(unsigned int irq)
{
@@ -162,72 +132,75 @@ struct irq_data *irq_get_irq_data(unsigned int irq)
}
EXPORT_SYMBOL_GPL(irq_get_irq_data);
-/**
- * set_irq_nested_thread - Set/Reset the IRQ_NESTED_THREAD flag of an irq
- *
- * @irq: Interrupt number
- * @nest: 0 to clear / 1 to set the IRQ_NESTED_THREAD flag
- *
- * The IRQ_NESTED_THREAD flag indicates that on
- * request_threaded_irq() no separate interrupt thread should be
- * created for the irq as the handler are called nested in the
- * context of a demultiplexing interrupt handler thread.
- */
-void set_irq_nested_thread(unsigned int irq, int nest)
+static void irq_state_clr_disabled(struct irq_desc *desc)
{
- struct irq_desc *desc = irq_to_desc(irq);
- unsigned long flags;
-
- if (!desc)
- return;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- if (nest)
- desc->status |= IRQ_NESTED_THREAD;
- else
- desc->status &= ~IRQ_NESTED_THREAD;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ desc->istate &= ~IRQS_DISABLED;
+ irq_compat_clr_disabled(desc);
}
-EXPORT_SYMBOL_GPL(set_irq_nested_thread);
-/*
- * default enable function
- */
-static void default_enable(struct irq_data *data)
+static void irq_state_set_disabled(struct irq_desc *desc)
{
- struct irq_desc *desc = irq_data_to_desc(data);
+ desc->istate |= IRQS_DISABLED;
+ irq_compat_set_disabled(desc);
+}
- desc->irq_data.chip->irq_unmask(&desc->irq_data);
- desc->status &= ~IRQ_MASKED;
+static void irq_state_clr_masked(struct irq_desc *desc)
+{
+ desc->istate &= ~IRQS_MASKED;
+ irq_compat_clr_masked(desc);
}
-/*
- * default disable function
- */
-static void default_disable(struct irq_data *data)
+static void irq_state_set_masked(struct irq_desc *desc)
{
+ desc->istate |= IRQS_MASKED;
+ irq_compat_set_masked(desc);
}
-/*
- * default startup function
- */
-static unsigned int default_startup(struct irq_data *data)
+int irq_startup(struct irq_desc *desc)
{
- struct irq_desc *desc = irq_data_to_desc(data);
+ irq_state_clr_disabled(desc);
+ desc->depth = 0;
+
+ if (desc->irq_data.chip->irq_startup) {
+ int ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
+ irq_state_clr_masked(desc);
+ return ret;
+ }
- desc->irq_data.chip->irq_enable(data);
+ irq_enable(desc);
return 0;
}
-/*
- * default shutdown function
- */
-static void default_shutdown(struct irq_data *data)
+void irq_shutdown(struct irq_desc *desc)
{
- struct irq_desc *desc = irq_data_to_desc(data);
+ irq_state_set_disabled(desc);
+ desc->depth = 1;
+ if (desc->irq_data.chip->irq_shutdown)
+ desc->irq_data.chip->irq_shutdown(&desc->irq_data);
+ if (desc->irq_data.chip->irq_disable)
+ desc->irq_data.chip->irq_disable(&desc->irq_data);
+ else
+ desc->irq_data.chip->irq_mask(&desc->irq_data);
+ irq_state_set_masked(desc);
+}
- desc->irq_data.chip->irq_mask(&desc->irq_data);
- desc->status |= IRQ_MASKED;
+void irq_enable(struct irq_desc *desc)
+{
+ irq_state_clr_disabled(desc);
+ if (desc->irq_data.chip->irq_enable)
+ desc->irq_data.chip->irq_enable(&desc->irq_data);
+ else
+ desc->irq_data.chip->irq_unmask(&desc->irq_data);
+ irq_state_clr_masked(desc);
+}
+
+void irq_disable(struct irq_desc *desc)
+{
+ irq_state_set_disabled(desc);
+ if (desc->irq_data.chip->irq_disable) {
+ desc->irq_data.chip->irq_disable(&desc->irq_data);
+ irq_state_set_masked(desc);
+ }
}
#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
@@ -315,10 +288,6 @@ static void compat_bus_sync_unlock(struct irq_data *data)
void irq_chip_set_defaults(struct irq_chip *chip)
{
#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
- /*
- * Compat fixup functions need to be before we set the
- * defaults for enable/disable/startup/shutdown
- */
if (chip->enable)
chip->irq_enable = compat_irq_enable;
if (chip->disable)
@@ -327,33 +296,8 @@ void irq_chip_set_defaults(struct irq_chip *chip)
chip->irq_shutdown = compat_irq_shutdown;
if (chip->startup)
chip->irq_startup = compat_irq_startup;
-#endif
- /*
- * The real defaults
- */
- if (!chip->irq_enable)
- chip->irq_enable = default_enable;
- if (!chip->irq_disable)
- chip->irq_disable = default_disable;
- if (!chip->irq_startup)
- chip->irq_startup = default_startup;
- /*
- * We use chip->irq_disable, when the user provided its own. When
- * we have default_disable set for chip->irq_disable, then we need
- * to use default_shutdown, otherwise the irq line is not
- * disabled on free_irq():
- */
- if (!chip->irq_shutdown)
- chip->irq_shutdown = chip->irq_disable != default_disable ?
- chip->irq_disable : default_shutdown;
-
-#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
if (!chip->end)
chip->end = dummy_irq_chip.end;
-
- /*
- * Now fix up the remaining compat handlers
- */
if (chip->bus_lock)
chip->irq_bus_lock = compat_bus_lock;
if (chip->bus_sync_unlock)
@@ -388,22 +332,22 @@ static inline void mask_ack_irq(struct irq_desc *desc)
if (desc->irq_data.chip->irq_ack)
desc->irq_data.chip->irq_ack(&desc->irq_data);
}
- desc->status |= IRQ_MASKED;
+ irq_state_set_masked(desc);
}
-static inline void mask_irq(struct irq_desc *desc)
+void mask_irq(struct irq_desc *desc)
{
if (desc->irq_data.chip->irq_mask) {
desc->irq_data.chip->irq_mask(&desc->irq_data);
- desc->status |= IRQ_MASKED;
+ irq_state_set_masked(desc);
}
}
-static inline void unmask_irq(struct irq_desc *desc)
+void unmask_irq(struct irq_desc *desc)
{
if (desc->irq_data.chip->irq_unmask) {
desc->irq_data.chip->irq_unmask(&desc->irq_data);
- desc->status &= ~IRQ_MASKED;
+ irq_state_clr_masked(desc);
}
}
@@ -428,10 +372,11 @@ void handle_nested_irq(unsigned int irq)
kstat_incr_irqs_this_cpu(irq, desc);
action = desc->action;
- if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+ if (unlikely(!action || (desc->istate & IRQS_DISABLED)))
goto out_unlock;
- desc->status |= IRQ_INPROGRESS;
+ irq_compat_set_progress(desc);
+ desc->istate |= IRQS_INPROGRESS;
raw_spin_unlock_irq(&desc->lock);
action_ret = action->thread_fn(action->irq, action->dev_id);
@@ -439,13 +384,21 @@ void handle_nested_irq(unsigned int irq)
note_interrupt(irq, desc, action_ret);
raw_spin_lock_irq(&desc->lock);
- desc->status &= ~IRQ_INPROGRESS;
+ desc->istate &= ~IRQS_INPROGRESS;
+ irq_compat_clr_progress(desc);
out_unlock:
raw_spin_unlock_irq(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_nested_irq);
+static bool irq_check_poll(struct irq_desc *desc)
+{
+ if (!(desc->istate & IRQS_POLL_INPROGRESS))
+ return false;
+ return irq_wait_for_poll(desc);
+}
+
/**
* handle_simple_irq - Simple and software-decoded IRQs.
* @irq: the interrupt number
@@ -461,29 +414,20 @@ EXPORT_SYMBOL_GPL(handle_nested_irq);
void
handle_simple_irq(unsigned int irq, struct irq_desc *desc)
{
- struct irqaction *action;
- irqreturn_t action_ret;
-
raw_spin_lock(&desc->lock);
- if (unlikely(desc->status & IRQ_INPROGRESS))
- goto out_unlock;
- desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+ if (unlikely(desc->istate & IRQS_INPROGRESS))
+ if (!irq_check_poll(desc))
+ goto out_unlock;
+
+ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
kstat_incr_irqs_this_cpu(irq, desc);
- action = desc->action;
- if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+ if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED)))
goto out_unlock;
- desc->status |= IRQ_INPROGRESS;
- raw_spin_unlock(&desc->lock);
+ handle_irq_event(desc);
- action_ret = handle_IRQ_event(irq, action);
- if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
-
- raw_spin_lock(&desc->lock);
- desc->status &= ~IRQ_INPROGRESS;
out_unlock:
raw_spin_unlock(&desc->lock);
}
@@ -501,42 +445,42 @@ out_unlock:
void
handle_level_irq(unsigned int irq, struct irq_desc *desc)
{
- struct irqaction *action;
- irqreturn_t action_ret;
-
raw_spin_lock(&desc->lock);
mask_ack_irq(desc);
- if (unlikely(desc->status & IRQ_INPROGRESS))
- goto out_unlock;
- desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+ if (unlikely(desc->istate & IRQS_INPROGRESS))
+ if (!irq_check_poll(desc))
+ goto out_unlock;
+
+ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
kstat_incr_irqs_this_cpu(irq, desc);
/*
* If its disabled or no action available
* keep it masked and get out of here
*/
- action = desc->action;
- if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+ if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED)))
goto out_unlock;
- desc->status |= IRQ_INPROGRESS;
- raw_spin_unlock(&desc->lock);
-
- action_ret = handle_IRQ_event(irq, action);
- if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ handle_irq_event(desc);
- raw_spin_lock(&desc->lock);
- desc->status &= ~IRQ_INPROGRESS;
-
- if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT)))
+ if (!(desc->istate & (IRQS_DISABLED | IRQS_ONESHOT)))
unmask_irq(desc);
out_unlock:
raw_spin_unlock(&desc->lock);
}
EXPORT_SYMBOL_GPL(handle_level_irq);
+#ifdef CONFIG_IRQ_PREFLOW_FASTEOI
+static inline void preflow_handler(struct irq_desc *desc)
+{
+ if (desc->preflow_handler)
+ desc->preflow_handler(&desc->irq_data);
+}
+#else
+static inline void preflow_handler(struct irq_desc *desc) { }
+#endif
+
/**
* handle_fasteoi_irq - irq handler for transparent controllers
* @irq: the interrupt number
@@ -550,42 +494,41 @@ EXPORT_SYMBOL_GPL(handle_level_irq);
void
handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
{
- struct irqaction *action;
- irqreturn_t action_ret;
-
raw_spin_lock(&desc->lock);
- if (unlikely(desc->status & IRQ_INPROGRESS))
- goto out;
+ if (unlikely(desc->istate & IRQS_INPROGRESS))
+ if (!irq_check_poll(desc))
+ goto out;
- desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
kstat_incr_irqs_this_cpu(irq, desc);
/*
* If its disabled or no action available
* then mask it and get out of here:
*/
- action = desc->action;
- if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
- desc->status |= IRQ_PENDING;
+ if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED))) {
+ irq_compat_set_pending(desc);
+ desc->istate |= IRQS_PENDING;
mask_irq(desc);
goto out;
}
- desc->status |= IRQ_INPROGRESS;
- desc->status &= ~IRQ_PENDING;
- raw_spin_unlock(&desc->lock);
+ if (desc->istate & IRQS_ONESHOT)
+ mask_irq(desc);
- action_ret = handle_IRQ_event(irq, action);
- if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ preflow_handler(desc);
+ handle_irq_event(desc);
- raw_spin_lock(&desc->lock);
- desc->status &= ~IRQ_INPROGRESS;
-out:
+out_eoi:
desc->irq_data.chip->irq_eoi(&desc->irq_data);
-
+out_unlock:
raw_spin_unlock(&desc->lock);
+ return;
+out:
+ if (!(desc->irq_data.chip->flags & IRQCHIP_EOI_IF_HANDLED))
+ goto out_eoi;
+ goto out_unlock;
}
/**
@@ -609,32 +552,28 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
{
raw_spin_lock(&desc->lock);
- desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
-
+ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
/*
* If we're currently running this IRQ, or its disabled,
* we shouldn't process the IRQ. Mark it pending, handle
* the necessary masking and go out
*/
- if (unlikely((desc->status & (IRQ_INPROGRESS | IRQ_DISABLED)) ||
- !desc->action)) {
- desc->status |= (IRQ_PENDING | IRQ_MASKED);
- mask_ack_irq(desc);
- goto out_unlock;
+ if (unlikely((desc->istate & (IRQS_DISABLED | IRQS_INPROGRESS) ||
+ !desc->action))) {
+ if (!irq_check_poll(desc)) {
+ irq_compat_set_pending(desc);
+ desc->istate |= IRQS_PENDING;
+ mask_ack_irq(desc);
+ goto out_unlock;
+ }
}
kstat_incr_irqs_this_cpu(irq, desc);
/* Start handling the irq */
desc->irq_data.chip->irq_ack(&desc->irq_data);
- /* Mark the IRQ currently in progress.*/
- desc->status |= IRQ_INPROGRESS;
-
do {
- struct irqaction *action = desc->action;
- irqreturn_t action_ret;
-
- if (unlikely(!action)) {
+ if (unlikely(!desc->action)) {
mask_irq(desc);
goto out_unlock;
}
@@ -644,22 +583,17 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
* one, we could have masked the irq.
* Renable it, if it was not disabled in meantime.
*/
- if (unlikely((desc->status &
- (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) ==
- (IRQ_PENDING | IRQ_MASKED))) {
- unmask_irq(desc);
+ if (unlikely(desc->istate & IRQS_PENDING)) {
+ if (!(desc->istate & IRQS_DISABLED) &&
+ (desc->istate & IRQS_MASKED))
+ unmask_irq(desc);
}
- desc->status &= ~IRQ_PENDING;
- raw_spin_unlock(&desc->lock);
- action_ret = handle_IRQ_event(irq, action);
- if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
- raw_spin_lock(&desc->lock);
+ handle_irq_event(desc);
- } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
+ } while ((desc->istate & IRQS_PENDING) &&
+ !(desc->istate & IRQS_DISABLED));
- desc->status &= ~IRQ_INPROGRESS;
out_unlock:
raw_spin_unlock(&desc->lock);
}
@@ -674,103 +608,84 @@ out_unlock:
void
handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
{
- irqreturn_t action_ret;
+ struct irq_chip *chip = irq_desc_get_chip(desc);
kstat_incr_irqs_this_cpu(irq, desc);
- if (desc->irq_data.chip->irq_ack)
- desc->irq_data.chip->irq_ack(&desc->irq_data);
+ if (chip->irq_ack)
+ chip->irq_ack(&desc->irq_data);
- action_ret = handle_IRQ_event(irq, desc->action);
- if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ handle_irq_event_percpu(desc, desc->action);
- if (desc->irq_data.chip->irq_eoi)
- desc->irq_data.chip->irq_eoi(&desc->irq_data);
+ if (chip->irq_eoi)
+ chip->irq_eoi(&desc->irq_data);
}
void
-__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+__irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
const char *name)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
- if (!desc) {
- printk(KERN_ERR
- "Trying to install type control for IRQ%d\n", irq);
+ if (!desc)
return;
- }
- if (!handle)
+ if (!handle) {
handle = handle_bad_irq;
- else if (desc->irq_data.chip == &no_irq_chip) {
- printk(KERN_WARNING "Trying to install %sinterrupt handler "
- "for IRQ%d\n", is_chained ? "chained " : "", irq);
- /*
- * Some ARM implementations install a handler for really dumb
- * interrupt hardware without setting an irq_chip. This worked
- * with the ARM no_irq_chip but the check in setup_irq would
- * prevent us to setup the interrupt at all. Switch it to
- * dummy_irq_chip for easy transition.
- */
- desc->irq_data.chip = &dummy_irq_chip;
+ } else {
+ if (WARN_ON(desc->irq_data.chip == &no_irq_chip))
+ goto out;
}
- chip_bus_lock(desc);
- raw_spin_lock_irqsave(&desc->lock, flags);
-
/* Uninstall? */
if (handle == handle_bad_irq) {
if (desc->irq_data.chip != &no_irq_chip)
mask_ack_irq(desc);
- desc->status |= IRQ_DISABLED;
+ irq_compat_set_disabled(desc);
+ desc->istate |= IRQS_DISABLED;
desc->depth = 1;
}
desc->handle_irq = handle;
desc->name = name;
if (handle != handle_bad_irq && is_chained) {
- desc->status &= ~IRQ_DISABLED;
- desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
- desc->depth = 0;
- desc->irq_data.chip->irq_startup(&desc->irq_data);
+ irq_settings_set_noprobe(desc);
+ irq_settings_set_norequest(desc);
+ irq_startup(desc);
}
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- chip_bus_sync_unlock(desc);
-}
-EXPORT_SYMBOL_GPL(__set_irq_handler);
-
-void
-set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
- irq_flow_handler_t handle)
-{
- set_irq_chip(irq, chip);
- __set_irq_handler(irq, handle, 0, NULL);
+out:
+ irq_put_desc_busunlock(desc, flags);
}
+EXPORT_SYMBOL_GPL(__irq_set_handler);
void
-set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+irq_set_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
irq_flow_handler_t handle, const char *name)
{
- set_irq_chip(irq, chip);
- __set_irq_handler(irq, handle, 0, name);
+ irq_set_chip(irq, chip);
+ __irq_set_handler(irq, handle, 0, name);
}
void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
if (!desc)
return;
+ irq_settings_clr_and_set(desc, clr, set);
+
+ irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |
+ IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT);
+ if (irq_settings_has_no_balance_set(desc))
+ irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
+ if (irq_settings_is_per_cpu(desc))
+ irqd_set(&desc->irq_data, IRQD_PER_CPU);
+ if (irq_settings_can_move_pcntxt(desc))
+ irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT);
- /* Sanitize flags */
- set &= IRQF_MODIFY_MASK;
- clr &= IRQF_MODIFY_MASK;
+ irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc));
- raw_spin_lock_irqsave(&desc->lock, flags);
- desc->status &= ~clr;
- desc->status |= set;
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ irq_put_desc_unlock(desc, flags);
}
diff --git a/kernel/irq/compat.h b/kernel/irq/compat.h
new file mode 100644
index 000000000000..6bbaf66aca85
--- /dev/null
+++ b/kernel/irq/compat.h
@@ -0,0 +1,72 @@
+/*
+ * Compat layer for transition period
+ */
+#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+static inline void irq_compat_set_progress(struct irq_desc *desc)
+{
+ desc->status |= IRQ_INPROGRESS;
+}
+
+static inline void irq_compat_clr_progress(struct irq_desc *desc)
+{
+ desc->status &= ~IRQ_INPROGRESS;
+}
+static inline void irq_compat_set_disabled(struct irq_desc *desc)
+{
+ desc->status |= IRQ_DISABLED;
+}
+static inline void irq_compat_clr_disabled(struct irq_desc *desc)
+{
+ desc->status &= ~IRQ_DISABLED;
+}
+static inline void irq_compat_set_pending(struct irq_desc *desc)
+{
+ desc->status |= IRQ_PENDING;
+}
+
+static inline void irq_compat_clr_pending(struct irq_desc *desc)
+{
+ desc->status &= ~IRQ_PENDING;
+}
+static inline void irq_compat_set_masked(struct irq_desc *desc)
+{
+ desc->status |= IRQ_MASKED;
+}
+
+static inline void irq_compat_clr_masked(struct irq_desc *desc)
+{
+ desc->status &= ~IRQ_MASKED;
+}
+static inline void irq_compat_set_move_pending(struct irq_desc *desc)
+{
+ desc->status |= IRQ_MOVE_PENDING;
+}
+
+static inline void irq_compat_clr_move_pending(struct irq_desc *desc)
+{
+ desc->status &= ~IRQ_MOVE_PENDING;
+}
+static inline void irq_compat_set_affinity(struct irq_desc *desc)
+{
+ desc->status |= IRQ_AFFINITY_SET;
+}
+
+static inline void irq_compat_clr_affinity(struct irq_desc *desc)
+{
+ desc->status &= ~IRQ_AFFINITY_SET;
+}
+#else
+static inline void irq_compat_set_progress(struct irq_desc *desc) { }
+static inline void irq_compat_clr_progress(struct irq_desc *desc) { }
+static inline void irq_compat_set_disabled(struct irq_desc *desc) { }
+static inline void irq_compat_clr_disabled(struct irq_desc *desc) { }
+static inline void irq_compat_set_pending(struct irq_desc *desc) { }
+static inline void irq_compat_clr_pending(struct irq_desc *desc) { }
+static inline void irq_compat_set_masked(struct irq_desc *desc) { }
+static inline void irq_compat_clr_masked(struct irq_desc *desc) { }
+static inline void irq_compat_set_move_pending(struct irq_desc *desc) { }
+static inline void irq_compat_clr_move_pending(struct irq_desc *desc) { }
+static inline void irq_compat_set_affinity(struct irq_desc *desc) { }
+static inline void irq_compat_clr_affinity(struct irq_desc *desc) { }
+#endif
+
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
new file mode 100644
index 000000000000..d1a33b7fa61d
--- /dev/null
+++ b/kernel/irq/debug.h
@@ -0,0 +1,40 @@
+/*
+ * Debugging printout:
+ */
+
+#include <linux/kallsyms.h>
+
+#define P(f) if (desc->status & f) printk("%14s set\n", #f)
+#define PS(f) if (desc->istate & f) printk("%14s set\n", #f)
+
+static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
+{
+ printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
+ irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
+ printk("->handle_irq(): %p, ", desc->handle_irq);
+ print_symbol("%s\n", (unsigned long)desc->handle_irq);
+ printk("->irq_data.chip(): %p, ", desc->irq_data.chip);
+ print_symbol("%s\n", (unsigned long)desc->irq_data.chip);
+ printk("->action(): %p\n", desc->action);
+ if (desc->action) {
+ printk("->action->handler(): %p, ", desc->action->handler);
+ print_symbol("%s\n", (unsigned long)desc->action->handler);
+ }
+
+ P(IRQ_LEVEL);
+ P(IRQ_PER_CPU);
+ P(IRQ_NOPROBE);
+ P(IRQ_NOREQUEST);
+ P(IRQ_NOAUTOEN);
+
+ PS(IRQS_AUTODETECT);
+ PS(IRQS_INPROGRESS);
+ PS(IRQS_REPLAY);
+ PS(IRQS_WAITING);
+ PS(IRQS_DISABLED);
+ PS(IRQS_PENDING);
+ PS(IRQS_MASKED);
+}
+
+#undef P
+#undef PS
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 3540a7190122..517561fc7317 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -51,30 +51,92 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
"but no thread function available.", irq, action->name);
}
-/**
- * handle_IRQ_event - irq action chain handler
- * @irq: the interrupt number
- * @action: the interrupt action chain for this irq
- *
- * Handles the action chain of an irq event
- */
-irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
+static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
+{
+ /*
+ * Wake up the handler thread for this action. In case the
+ * thread crashed and was killed we just pretend that we
+ * handled the interrupt. The hardirq handler has disabled the
+ * device interrupt, so no irq storm is lurking. If the
+ * RUNTHREAD bit is already set, nothing to do.
+ */
+ if (test_bit(IRQTF_DIED, &action->thread_flags) ||
+ test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+ return;
+
+ /*
+ * It's safe to OR the mask lockless here. We have only two
+ * places which write to threads_oneshot: This code and the
+ * irq thread.
+ *
+ * This code is the hard irq context and can never run on two
+ * cpus in parallel. If it ever does we have more serious
+ * problems than this bitmask.
+ *
+ * The irq threads of this irq which clear their "running" bit
+ * in threads_oneshot are serialized via desc->lock against
+ * each other and they are serialized against this code by
+ * IRQS_INPROGRESS.
+ *
+ * Hard irq handler:
+ *
+ * spin_lock(desc->lock);
+ * desc->state |= IRQS_INPROGRESS;
+ * spin_unlock(desc->lock);
+ * set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
+ * desc->threads_oneshot |= mask;
+ * spin_lock(desc->lock);
+ * desc->state &= ~IRQS_INPROGRESS;
+ * spin_unlock(desc->lock);
+ *
+ * irq thread:
+ *
+ * again:
+ * spin_lock(desc->lock);
+ * if (desc->state & IRQS_INPROGRESS) {
+ * spin_unlock(desc->lock);
+ * while(desc->state & IRQS_INPROGRESS)
+ * cpu_relax();
+ * goto again;
+ * }
+ * if (!test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+ * desc->threads_oneshot &= ~mask;
+ * spin_unlock(desc->lock);
+ *
+ * So either the thread waits for us to clear IRQS_INPROGRESS
+ * or we are waiting in the flow handler for desc->lock to be
+ * released before we reach this point. The thread also checks
+ * IRQTF_RUNTHREAD under desc->lock. If set it leaves
+ * threads_oneshot untouched and runs the thread another time.
+ */
+ desc->threads_oneshot |= action->thread_mask;
+ wake_up_process(action->thread);
+}
+
+irqreturn_t
+handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action)
{
- irqreturn_t ret, retval = IRQ_NONE;
- unsigned int status = 0;
+ irqreturn_t retval = IRQ_NONE;
+ unsigned int random = 0, irq = desc->irq_data.irq;
do {
+ irqreturn_t res;
+
trace_irq_handler_entry(irq, action);
- ret = action->handler(irq, action->dev_id);
- trace_irq_handler_exit(irq, action, ret);
+ res = action->handler(irq, action->dev_id);
+ trace_irq_handler_exit(irq, action, res);
- switch (ret) {
+ if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
+ irq, action->handler))
+ local_irq_disable();
+
+ switch (res) {
case IRQ_WAKE_THREAD:
/*
* Set result to handled so the spurious check
* does not trigger.
*/
- ret = IRQ_HANDLED;
+ res = IRQ_HANDLED;
/*
* Catch drivers which return WAKE_THREAD but
@@ -85,36 +147,56 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
break;
}
- /*
- * Wake up the handler thread for this
- * action. In case the thread crashed and was
- * killed we just pretend that we handled the
- * interrupt. The hardirq handler above has
- * disabled the device interrupt, so no irq
- * storm is lurking.
- */
- if (likely(!test_bit(IRQTF_DIED,
- &action->thread_flags))) {
- set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
- wake_up_process(action->thread);
- }
+ irq_wake_thread(desc, action);
/* Fall through to add to randomness */
case IRQ_HANDLED:
- status |= action->flags;
+ random |= action->flags;
break;
default:
break;
}
- retval |= ret;
+ retval |= res;
action = action->next;
} while (action);
- if (status & IRQF_SAMPLE_RANDOM)
+ if (random & IRQF_SAMPLE_RANDOM)
add_interrupt_randomness(irq);
- local_irq_disable();
+ if (!noirqdebug)
+ note_interrupt(irq, desc, retval);
return retval;
}
+
+irqreturn_t handle_irq_event(struct irq_desc *desc)
+{
+ struct irqaction *action = desc->action;
+ irqreturn_t ret;
+
+ irq_compat_clr_pending(desc);
+ desc->istate &= ~IRQS_PENDING;
+ irq_compat_set_progress(desc);
+ desc->istate |= IRQS_INPROGRESS;
+ raw_spin_unlock(&desc->lock);
+
+ ret = handle_irq_event_percpu(desc, action);
+
+ raw_spin_lock(&desc->lock);
+ desc->istate &= ~IRQS_INPROGRESS;
+ irq_compat_clr_progress(desc);
+ return ret;
+}
+
+/**
+ * handle_IRQ_event - irq action chain handler
+ * @irq: the interrupt number
+ * @action: the interrupt action chain for this irq
+ *
+ * Handles the action chain of an irq event
+ */
+irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
+{
+ return handle_irq_event_percpu(irq_to_desc(irq), action);
+}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4571ae7e085a..6c6ec9a49027 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -1,27 +1,101 @@
/*
* IRQ subsystem internal functions and variables:
+ *
+ * Do not ever include this file from anything else than
+ * kernel/irq/. Do not even think about using any information outside
+ * of this file for your non core code.
*/
#include <linux/irqdesc.h>
+#ifdef CONFIG_SPARSE_IRQ
+# define IRQ_BITMAP_BITS (NR_IRQS + 8196)
+#else
+# define IRQ_BITMAP_BITS NR_IRQS
+#endif
+
+#define istate core_internal_state__do_not_mess_with_it
+
+#ifdef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
+# define status status_use_accessors
+#endif
+
extern int noirqdebug;
+/*
+ * Bits used by threaded handlers:
+ * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
+ * IRQTF_DIED - handler thread died
+ * IRQTF_WARNED - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
+ * IRQTF_AFFINITY - irq thread is requested to adjust affinity
+ * IRQTF_FORCED_THREAD - irq action is force threaded
+ */
+enum {
+ IRQTF_RUNTHREAD,
+ IRQTF_DIED,
+ IRQTF_WARNED,
+ IRQTF_AFFINITY,
+ IRQTF_FORCED_THREAD,
+};
+
+/*
+ * Bit masks for desc->state
+ *
+ * IRQS_AUTODETECT - autodetection in progress
+ * IRQS_SPURIOUS_DISABLED - was disabled due to spurious interrupt
+ * detection
+ * IRQS_POLL_INPROGRESS - polling in progress
+ * IRQS_INPROGRESS - Interrupt in progress
+ * IRQS_ONESHOT - irq is not unmasked in primary handler
+ * IRQS_REPLAY - irq is replayed
+ * IRQS_WAITING - irq is waiting
+ * IRQS_DISABLED - irq is disabled
+ * IRQS_PENDING - irq is pending and replayed later
+ * IRQS_MASKED - irq is masked
+ * IRQS_SUSPENDED - irq is suspended
+ */
+enum {
+ IRQS_AUTODETECT = 0x00000001,
+ IRQS_SPURIOUS_DISABLED = 0x00000002,
+ IRQS_POLL_INPROGRESS = 0x00000008,
+ IRQS_INPROGRESS = 0x00000010,
+ IRQS_ONESHOT = 0x00000020,
+ IRQS_REPLAY = 0x00000040,
+ IRQS_WAITING = 0x00000080,
+ IRQS_DISABLED = 0x00000100,
+ IRQS_PENDING = 0x00000200,
+ IRQS_MASKED = 0x00000400,
+ IRQS_SUSPENDED = 0x00000800,
+};
+
+#include "compat.h"
+#include "debug.h"
+#include "settings.h"
+
#define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data)
/* Set default functions for irq_chip structures: */
extern void irq_chip_set_defaults(struct irq_chip *chip);
-/* Set default handler: */
-extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
-
extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
unsigned long flags);
extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
+extern int irq_startup(struct irq_desc *desc);
+extern void irq_shutdown(struct irq_desc *desc);
+extern void irq_enable(struct irq_desc *desc);
+extern void irq_disable(struct irq_desc *desc);
+extern void mask_irq(struct irq_desc *desc);
+extern void unmask_irq(struct irq_desc *desc);
+
extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
+irqreturn_t handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action);
+irqreturn_t handle_irq_event(struct irq_desc *desc);
+
/* Resending of interrupts :*/
void check_irq_resend(struct irq_desc *desc, unsigned int irq);
+bool irq_wait_for_poll(struct irq_desc *desc);
#ifdef CONFIG_PROC_FS
extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
@@ -37,20 +111,10 @@ static inline void unregister_handler_proc(unsigned int irq,
struct irqaction *action) { }
#endif
-extern int irq_select_affinity_usr(unsigned int irq);
+extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask);
extern void irq_set_thread_affinity(struct irq_desc *desc);
-#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
-static inline void irq_end(unsigned int irq, struct irq_desc *desc)
-{
- if (desc->irq_data.chip && desc->irq_data.chip->end)
- desc->irq_data.chip->end(irq);
-}
-#else
-static inline void irq_end(unsigned int irq, struct irq_desc *desc) { }
-#endif
-
/* Inline functions for support of irq chips on slow busses */
static inline void chip_bus_lock(struct irq_desc *desc)
{
@@ -64,43 +128,60 @@ static inline void chip_bus_sync_unlock(struct irq_desc *desc)
desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data);
}
+struct irq_desc *
+__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus);
+void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus);
+
+static inline struct irq_desc *
+irq_get_desc_buslock(unsigned int irq, unsigned long *flags)
+{
+ return __irq_get_desc_lock(irq, flags, true);
+}
+
+static inline void
+irq_put_desc_busunlock(struct irq_desc *desc, unsigned long flags)
+{
+ __irq_put_desc_unlock(desc, flags, true);
+}
+
+static inline struct irq_desc *
+irq_get_desc_lock(unsigned int irq, unsigned long *flags)
+{
+ return __irq_get_desc_lock(irq, flags, false);
+}
+
+static inline void
+irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
+{
+ __irq_put_desc_unlock(desc, flags, false);
+}
+
/*
- * Debugging printout:
+ * Manipulation functions for irq_data.state
*/
+static inline void irqd_set_move_pending(struct irq_data *d)
+{
+ d->state_use_accessors |= IRQD_SETAFFINITY_PENDING;
+ irq_compat_set_move_pending(irq_data_to_desc(d));
+}
-#include <linux/kallsyms.h>
-
-#define P(f) if (desc->status & f) printk("%14s set\n", #f)
+static inline void irqd_clr_move_pending(struct irq_data *d)
+{
+ d->state_use_accessors &= ~IRQD_SETAFFINITY_PENDING;
+ irq_compat_clr_move_pending(irq_data_to_desc(d));
+}
-static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
+static inline void irqd_clear(struct irq_data *d, unsigned int mask)
{
- printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
- irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
- printk("->handle_irq(): %p, ", desc->handle_irq);
- print_symbol("%s\n", (unsigned long)desc->handle_irq);
- printk("->irq_data.chip(): %p, ", desc->irq_data.chip);
- print_symbol("%s\n", (unsigned long)desc->irq_data.chip);
- printk("->action(): %p\n", desc->action);
- if (desc->action) {
- printk("->action->handler(): %p, ", desc->action->handler);
- print_symbol("%s\n", (unsigned long)desc->action->handler);
- }
-
- P(IRQ_INPROGRESS);
- P(IRQ_DISABLED);
- P(IRQ_PENDING);
- P(IRQ_REPLAY);
- P(IRQ_AUTODETECT);
- P(IRQ_WAITING);
- P(IRQ_LEVEL);
- P(IRQ_MASKED);
-#ifdef CONFIG_IRQ_PER_CPU
- P(IRQ_PER_CPU);
-#endif
- P(IRQ_NOPROBE);
- P(IRQ_NOREQUEST);
- P(IRQ_NOAUTOEN);
+ d->state_use_accessors &= ~mask;
}
-#undef P
+static inline void irqd_set(struct irq_data *d, unsigned int mask)
+{
+ d->state_use_accessors |= mask;
+}
+static inline bool irqd_has_set(struct irq_data *d, unsigned int mask)
+{
+ return d->state_use_accessors & mask;
+}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 282f20230e67..dbccc799407f 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -79,7 +79,8 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
desc->irq_data.chip_data = NULL;
desc->irq_data.handler_data = NULL;
desc->irq_data.msi_desc = NULL;
- desc->status = IRQ_DEFAULT_INIT_FLAGS;
+ irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
+ desc->istate = IRQS_DISABLED;
desc->handle_irq = handle_bad_irq;
desc->depth = 1;
desc->irq_count = 0;
@@ -94,7 +95,7 @@ int nr_irqs = NR_IRQS;
EXPORT_SYMBOL_GPL(nr_irqs);
static DEFINE_MUTEX(sparse_irq_lock);
-static DECLARE_BITMAP(allocated_irqs, NR_IRQS);
+static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS);
#ifdef CONFIG_SPARSE_IRQ
@@ -206,6 +207,14 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
return NULL;
}
+static int irq_expand_nr_irqs(unsigned int nr)
+{
+ if (nr > IRQ_BITMAP_BITS)
+ return -ENOMEM;
+ nr_irqs = nr;
+ return 0;
+}
+
int __init early_irq_init(void)
{
int i, initcnt, node = first_online_node;
@@ -217,6 +226,15 @@ int __init early_irq_init(void)
initcnt = arch_probe_nr_irqs();
printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt);
+ if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS))
+ nr_irqs = IRQ_BITMAP_BITS;
+
+ if (WARN_ON(initcnt > IRQ_BITMAP_BITS))
+ initcnt = IRQ_BITMAP_BITS;
+
+ if (initcnt > nr_irqs)
+ nr_irqs = initcnt;
+
for (i = 0; i < initcnt; i++) {
desc = alloc_desc(i, node);
set_bit(i, allocated_irqs);
@@ -229,7 +247,7 @@ int __init early_irq_init(void)
struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
[0 ... NR_IRQS-1] = {
- .status = IRQ_DEFAULT_INIT_FLAGS,
+ .istate = IRQS_DISABLED,
.handle_irq = handle_bad_irq,
.depth = 1,
.lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
@@ -251,8 +269,8 @@ int __init early_irq_init(void)
for (i = 0; i < count; i++) {
desc[i].irq_data.irq = i;
desc[i].irq_data.chip = &no_irq_chip;
- /* TODO : do this allocation on-demand ... */
desc[i].kstat_irqs = alloc_percpu(unsigned int);
+ irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
alloc_masks(desc + i, GFP_KERNEL, node);
desc_smp_init(desc + i, node);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
@@ -277,24 +295,14 @@ static void free_desc(unsigned int irq)
static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
{
-#if defined(CONFIG_KSTAT_IRQS_ONDEMAND)
- struct irq_desc *desc;
- unsigned int i;
-
- for (i = 0; i < cnt; i++) {
- desc = irq_to_desc(start + i);
- if (desc && !desc->kstat_irqs) {
- unsigned int __percpu *stats = alloc_percpu(unsigned int);
-
- if (!stats)
- return -1;
- if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL)
- free_percpu(stats);
- }
- }
-#endif
return start;
}
+
+static int irq_expand_nr_irqs(unsigned int nr)
+{
+ return -ENOMEM;
+}
+
#endif /* !CONFIG_SPARSE_IRQ */
/* Dynamic interrupt handling */
@@ -338,14 +346,17 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
mutex_lock(&sparse_irq_lock);
- start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0);
+ start = bitmap_find_next_zero_area(allocated_irqs, IRQ_BITMAP_BITS,
+ from, cnt, 0);
ret = -EEXIST;
if (irq >=0 && start != irq)
goto err;
- ret = -ENOMEM;
- if (start >= nr_irqs)
- goto err;
+ if (start + cnt > nr_irqs) {
+ ret = irq_expand_nr_irqs(start + cnt);
+ if (ret)
+ goto err;
+ }
bitmap_set(allocated_irqs, start, cnt);
mutex_unlock(&sparse_irq_lock);
@@ -392,6 +403,26 @@ unsigned int irq_get_next_irq(unsigned int offset)
return find_next_bit(allocated_irqs, nr_irqs, offset);
}
+struct irq_desc *
+__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (desc) {
+ if (bus)
+ chip_bus_lock(desc);
+ raw_spin_lock_irqsave(&desc->lock, *flags);
+ }
+ return desc;
+}
+
+void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus)
+{
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ if (bus)
+ chip_bus_sync_unlock(desc);
+}
+
/**
* dynamic_irq_cleanup - cleanup a dynamically allocated irq
* @irq: irq number to initialize
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0caa59f747dd..acd599a43bfb 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -17,6 +17,17 @@
#include "internals.h"
+#ifdef CONFIG_IRQ_FORCED_THREADING
+__read_mostly bool force_irqthreads;
+
+static int __init setup_forced_irqthreads(char *arg)
+{
+ force_irqthreads = true;
+ return 0;
+}
+early_param("threadirqs", setup_forced_irqthreads);
+#endif
+
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
* @irq: interrupt number to wait for
@@ -30,7 +41,7 @@
void synchronize_irq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
- unsigned int status;
+ unsigned int state;
if (!desc)
return;
@@ -42,16 +53,16 @@ void synchronize_irq(unsigned int irq)
* Wait until we're out of the critical section. This might
* give the wrong answer due to the lack of memory barriers.
*/
- while (desc->status & IRQ_INPROGRESS)
+ while (desc->istate & IRQS_INPROGRESS)
cpu_relax();
/* Ok, that indicated we're done: double-check carefully. */
raw_spin_lock_irqsave(&desc->lock, flags);
- status = desc->status;
+ state = desc->istate;
raw_spin_unlock_irqrestore(&desc->lock, flags);
/* Oops, that failed? */
- } while (status & IRQ_INPROGRESS);
+ } while (state & IRQS_INPROGRESS);
/*
* We made sure that no hardirq handler is running. Now verify
@@ -73,8 +84,8 @@ int irq_can_set_affinity(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
- if (CHECK_IRQ_PER_CPU(desc->status) || !desc->irq_data.chip ||
- !desc->irq_data.chip->irq_set_affinity)
+ if (!desc || !irqd_can_balance(&desc->irq_data) ||
+ !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity)
return 0;
return 1;
@@ -100,67 +111,169 @@ void irq_set_thread_affinity(struct irq_desc *desc)
}
}
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+static inline bool irq_can_move_pcntxt(struct irq_desc *desc)
+{
+ return irq_settings_can_move_pcntxt(desc);
+}
+static inline bool irq_move_pending(struct irq_desc *desc)
+{
+ return irqd_is_setaffinity_pending(&desc->irq_data);
+}
+static inline void
+irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
+{
+ cpumask_copy(desc->pending_mask, mask);
+}
+static inline void
+irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
+{
+ cpumask_copy(mask, desc->pending_mask);
+}
+#else
+static inline bool irq_can_move_pcntxt(struct irq_desc *desc) { return true; }
+static inline bool irq_move_pending(struct irq_desc *desc) { return false; }
+static inline void
+irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { }
+static inline void
+irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
+#endif
+
/**
* irq_set_affinity - Set the irq affinity of a given irq
* @irq: Interrupt to set affinity
* @cpumask: cpumask
*
*/
-int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_chip *chip = desc->irq_data.chip;
unsigned long flags;
+ int ret = 0;
if (!chip->irq_set_affinity)
return -EINVAL;
raw_spin_lock_irqsave(&desc->lock, flags);
-#ifdef CONFIG_GENERIC_PENDING_IRQ
- if (desc->status & IRQ_MOVE_PCNTXT) {
- if (!chip->irq_set_affinity(&desc->irq_data, cpumask, false)) {
- cpumask_copy(desc->irq_data.affinity, cpumask);
+ if (irq_can_move_pcntxt(desc)) {
+ ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
+ switch (ret) {
+ case IRQ_SET_MASK_OK:
+ cpumask_copy(desc->irq_data.affinity, mask);
+ case IRQ_SET_MASK_OK_NOCOPY:
irq_set_thread_affinity(desc);
+ ret = 0;
}
+ } else {
+ irqd_set_move_pending(&desc->irq_data);
+ irq_copy_pending(desc, mask);
}
- else {
- desc->status |= IRQ_MOVE_PENDING;
- cpumask_copy(desc->pending_mask, cpumask);
- }
-#else
- if (!chip->irq_set_affinity(&desc->irq_data, cpumask, false)) {
- cpumask_copy(desc->irq_data.affinity, cpumask);
- irq_set_thread_affinity(desc);
+
+ if (desc->affinity_notify) {
+ kref_get(&desc->affinity_notify->kref);
+ schedule_work(&desc->affinity_notify->work);
}
-#endif
- desc->status |= IRQ_AFFINITY_SET;
+ irq_compat_set_affinity(desc);
+ irqd_set(&desc->irq_data, IRQD_AFFINITY_SET);
raw_spin_unlock_irqrestore(&desc->lock, flags);
- return 0;
+ return ret;
}
int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
{
+ unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
+
+ if (!desc)
+ return -EINVAL;
+ desc->affinity_hint = m;
+ irq_put_desc_unlock(desc, flags);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
+
+static void irq_affinity_notify(struct work_struct *work)
+{
+ struct irq_affinity_notify *notify =
+ container_of(work, struct irq_affinity_notify, work);
+ struct irq_desc *desc = irq_to_desc(notify->irq);
+ cpumask_var_t cpumask;
+ unsigned long flags;
+
+ if (!desc || !alloc_cpumask_var(&cpumask, GFP_KERNEL))
+ goto out;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ if (irq_move_pending(desc))
+ irq_get_pending(cpumask, desc);
+ else
+ cpumask_copy(cpumask, desc->irq_data.affinity);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+ notify->notify(notify, cpumask);
+
+ free_cpumask_var(cpumask);
+out:
+ kref_put(&notify->kref, notify->release);
+}
+
+/**
+ * irq_set_affinity_notifier - control notification of IRQ affinity changes
+ * @irq: Interrupt for which to enable/disable notification
+ * @notify: Context for notification, or %NULL to disable
+ * notification. Function pointers must be initialised;
+ * the other fields will be initialised by this function.
+ *
+ * Must be called in process context. Notification may only be enabled
+ * after the IRQ is allocated and must be disabled before the IRQ is
+ * freed using free_irq().
+ */
+int
+irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
+{
struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_affinity_notify *old_notify;
unsigned long flags;
+ /* The release function is promised process context */
+ might_sleep();
+
if (!desc)
return -EINVAL;
+ /* Complete initialisation of *notify */
+ if (notify) {
+ notify->irq = irq;
+ kref_init(&notify->kref);
+ INIT_WORK(&notify->work, irq_affinity_notify);
+ }
+
raw_spin_lock_irqsave(&desc->lock, flags);
- desc->affinity_hint = m;
+ old_notify = desc->affinity_notify;
+ desc->affinity_notify = notify;
raw_spin_unlock_irqrestore(&desc->lock, flags);
+ if (old_notify)
+ kref_put(&old_notify->kref, old_notify->release);
+
return 0;
}
-EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
+EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
#ifndef CONFIG_AUTO_IRQ_AFFINITY
/*
* Generic version of the affinity autoselector.
*/
-static int setup_affinity(unsigned int irq, struct irq_desc *desc)
+static int
+setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct cpumask *set = irq_default_affinity;
+ int ret;
+
+ /* Excludes PER_CPU and NO_BALANCE interrupts */
if (!irq_can_set_affinity(irq))
return 0;
@@ -168,22 +281,29 @@ static int setup_affinity(unsigned int irq, struct irq_desc *desc)
* Preserve an userspace affinity setup, but make sure that
* one of the targets is online.
*/
- if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
- if (cpumask_any_and(desc->irq_data.affinity, cpu_online_mask)
- < nr_cpu_ids)
- goto set_affinity;
- else
- desc->status &= ~IRQ_AFFINITY_SET;
+ if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) {
+ if (cpumask_intersects(desc->irq_data.affinity,
+ cpu_online_mask))
+ set = desc->irq_data.affinity;
+ else {
+ irq_compat_clr_affinity(desc);
+ irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
+ }
}
- cpumask_and(desc->irq_data.affinity, cpu_online_mask, irq_default_affinity);
-set_affinity:
- desc->irq_data.chip->irq_set_affinity(&desc->irq_data, desc->irq_data.affinity, false);
-
+ cpumask_and(mask, cpu_online_mask, set);
+ ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
+ switch (ret) {
+ case IRQ_SET_MASK_OK:
+ cpumask_copy(desc->irq_data.affinity, mask);
+ case IRQ_SET_MASK_OK_NOCOPY:
+ irq_set_thread_affinity(desc);
+ }
return 0;
}
#else
-static inline int setup_affinity(unsigned int irq, struct irq_desc *d)
+static inline int
+setup_affinity(unsigned int irq, struct irq_desc *d, struct cpumask *mask)
{
return irq_select_affinity(irq);
}
@@ -192,23 +312,21 @@ static inline int setup_affinity(unsigned int irq, struct irq_desc *d)
/*
* Called when affinity is set via /proc/irq
*/
-int irq_select_affinity_usr(unsigned int irq)
+int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
int ret;
raw_spin_lock_irqsave(&desc->lock, flags);
- ret = setup_affinity(irq, desc);
- if (!ret)
- irq_set_thread_affinity(desc);
+ ret = setup_affinity(irq, desc, mask);
raw_spin_unlock_irqrestore(&desc->lock, flags);
-
return ret;
}
#else
-static inline int setup_affinity(unsigned int irq, struct irq_desc *desc)
+static inline int
+setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
{
return 0;
}
@@ -219,13 +337,23 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
if (suspend) {
if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND))
return;
- desc->status |= IRQ_SUSPENDED;
+ desc->istate |= IRQS_SUSPENDED;
}
- if (!desc->depth++) {
- desc->status |= IRQ_DISABLED;
- desc->irq_data.chip->irq_disable(&desc->irq_data);
- }
+ if (!desc->depth++)
+ irq_disable(desc);
+}
+
+static int __disable_irq_nosync(unsigned int irq)
+{
+ unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
+
+ if (!desc)
+ return -EINVAL;
+ __disable_irq(desc, irq, false);
+ irq_put_desc_busunlock(desc, flags);
+ return 0;
}
/**
@@ -241,17 +369,7 @@ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
*/
void disable_irq_nosync(unsigned int irq)
{
- struct irq_desc *desc = irq_to_desc(irq);
- unsigned long flags;
-
- if (!desc)
- return;
-
- chip_bus_lock(desc);
- raw_spin_lock_irqsave(&desc->lock, flags);
- __disable_irq(desc, irq, false);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- chip_bus_sync_unlock(desc);
+ __disable_irq_nosync(irq);
}
EXPORT_SYMBOL(disable_irq_nosync);
@@ -269,21 +387,24 @@ EXPORT_SYMBOL(disable_irq_nosync);
*/
void disable_irq(unsigned int irq)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
- if (!desc)
- return;
-
- disable_irq_nosync(irq);
- if (desc->action)
+ if (!__disable_irq_nosync(irq))
synchronize_irq(irq);
}
EXPORT_SYMBOL(disable_irq);
void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
{
- if (resume)
- desc->status &= ~IRQ_SUSPENDED;
+ if (resume) {
+ if (!(desc->istate & IRQS_SUSPENDED)) {
+ if (!desc->action)
+ return;
+ if (!(desc->action->flags & IRQF_FORCE_RESUME))
+ return;
+ /* Pretend that it got disabled ! */
+ desc->depth++;
+ }
+ desc->istate &= ~IRQS_SUSPENDED;
+ }
switch (desc->depth) {
case 0:
@@ -291,12 +412,11 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);
break;
case 1: {
- unsigned int status = desc->status & ~IRQ_DISABLED;
-
- if (desc->status & IRQ_SUSPENDED)
+ if (desc->istate & IRQS_SUSPENDED)
goto err_out;
/* Prevent probing on this irq: */
- desc->status = status | IRQ_NOPROBE;
+ irq_settings_set_noprobe(desc);
+ irq_enable(desc);
check_irq_resend(desc, irq);
/* fall-through */
}
@@ -318,21 +438,18 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
*/
void enable_irq(unsigned int irq)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
if (!desc)
return;
+ if (WARN(!desc->irq_data.chip,
+ KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
+ goto out;
- if (WARN(!desc->irq_data.chip || !desc->irq_data.chip->irq_enable,
- KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
- return;
-
- chip_bus_lock(desc);
- raw_spin_lock_irqsave(&desc->lock, flags);
__enable_irq(desc, irq, false);
- raw_spin_unlock_irqrestore(&desc->lock, flags);
- chip_bus_sync_unlock(desc);
+out:
+ irq_put_desc_busunlock(desc, flags);
}
EXPORT_SYMBOL(enable_irq);
@@ -348,7 +465,7 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
}
/**
- * set_irq_wake - control irq power management wakeup
+ * irq_set_irq_wake - control irq power management wakeup
* @irq: interrupt to control
* @on: enable/disable power management wakeup
*
@@ -359,23 +476,22 @@ static int set_irq_wake_real(unsigned int irq, unsigned int on)
* Wakeup mode lets this IRQ wake the system from sleep
* states like "suspend to RAM".
*/
-int set_irq_wake(unsigned int irq, unsigned int on)
+int irq_set_irq_wake(unsigned int irq, unsigned int on)
{
- struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_buslock(irq, &flags);
int ret = 0;
/* wakeup-capable irqs can be shared between drivers that
* don't need to have the same sleep mode behaviors.
*/
- raw_spin_lock_irqsave(&desc->lock, flags);
if (on) {
if (desc->wake_depth++ == 0) {
ret = set_irq_wake_real(irq, on);
if (ret)
desc->wake_depth = 0;
else
- desc->status |= IRQ_WAKEUP;
+ irqd_set(&desc->irq_data, IRQD_WAKEUP_STATE);
}
} else {
if (desc->wake_depth == 0) {
@@ -385,14 +501,13 @@ int set_irq_wake(unsigned int irq, unsigned int on)
if (ret)
desc->wake_depth = 1;
else
- desc->status &= ~IRQ_WAKEUP;
+ irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE);
}
}
-
- raw_spin_unlock_irqrestore(&desc->lock, flags);
+ irq_put_desc_busunlock(desc, flags);
return ret;
}
-EXPORT_SYMBOL(set_irq_wake);
+EXPORT_SYMBOL(irq_set_irq_wake);
/*
* Internal function that tells the architecture code whether a
@@ -401,43 +516,27 @@ EXPORT_SYMBOL(set_irq_wake);
*/
int can_request_irq(unsigned int irq, unsigned long irqflags)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irqaction *action;
unsigned long flags;
+ struct irq_desc *desc = irq_get_desc_lock(irq, &flags);
+ int canrequest = 0;
if (!desc)
return 0;
- if (desc->status & IRQ_NOREQUEST)
- return 0;
-
- raw_spin_lock_irqsave(&desc->lock, flags);
- action = desc->action;
- if (action)
- if (irqflags & action->flags & IRQF_SHARED)
- action = NULL;
-
- raw_spin_unlock_irqrestore(&desc->lock, flags);
-
- return !action;
-}
-
-void compat_irq_chip_set_default_handler(struct irq_desc *desc)
-{
- /*
- * If the architecture still has not overriden
- * the flow handler then zap the default. This
- * should catch incorrect flow-type setting.
- */
- if (desc->handle_irq == &handle_bad_irq)
- desc->handle_irq = NULL;
+ if (irq_settings_can_request(desc)) {
+ if (desc->action)
+ if (irqflags & desc->action->flags & IRQF_SHARED)
+ canrequest =1;
+ }
+ irq_put_desc_unlock(desc, flags);
+ return canrequest;
}
int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
unsigned long flags)
{
- int ret;
struct irq_chip *chip = desc->irq_data.chip;
+ int ret, unmask = 0;
if (!chip || !chip->irq_set_type) {
/*
@@ -449,23 +548,43 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
return 0;
}
+ flags &= IRQ_TYPE_SENSE_MASK;
+
+ if (chip->flags & IRQCHIP_SET_TYPE_MASKED) {
+ if (!(desc->istate & IRQS_MASKED))
+ mask_irq(desc);
+ if (!(desc->istate & IRQS_DISABLED))
+ unmask = 1;
+ }
+
/* caller masked out all except trigger mode flags */
ret = chip->irq_set_type(&desc->irq_data, flags);
- if (ret)
- pr_err("setting trigger mode %lu for irq %u failed (%pF)\n",
- flags, irq, chip->irq_set_type);
- else {
- if (flags & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
- flags |= IRQ_LEVEL;
- /* note that IRQF_TRIGGER_MASK == IRQ_TYPE_SENSE_MASK */
- desc->status &= ~(IRQ_LEVEL | IRQ_TYPE_SENSE_MASK);
- desc->status |= flags;
+ switch (ret) {
+ case IRQ_SET_MASK_OK:
+ irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK);
+ irqd_set(&desc->irq_data, flags);
+
+ case IRQ_SET_MASK_OK_NOCOPY:
+ flags = irqd_get_trigger_type(&desc->irq_data);
+ irq_settings_set_trigger_mask(desc, flags);
+ irqd_clear(&desc->irq_data, IRQD_LEVEL);
+ irq_settings_clr_level(desc);
+ if (flags & IRQ_TYPE_LEVEL_MASK) {
+ irq_settings_set_level(desc);
+ irqd_set(&desc->irq_data, IRQD_LEVEL);
+ }
if (chip != desc->irq_data.chip)
irq_chip_set_defaults(desc->irq_data.chip);
+ ret = 0;
+ break;
+ default:
+ pr_err("setting trigger mode %lu for irq %u failed (%pF)\n",
+ flags, irq, chip->irq_set_type);
}
-
+ if (unmask)
+ unmask_irq(desc);
return ret;
}
@@ -509,8 +628,11 @@ static int irq_wait_for_interrupt(struct irqaction *action)
* handler finished. unmask if the interrupt has not been disabled and
* is marked MASKED.
*/
-static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
+static void irq_finalize_oneshot(struct irq_desc *desc,
+ struct irqaction *action, bool force)
{
+ if (!(desc->istate & IRQS_ONESHOT))
+ return;
again:
chip_bus_lock(desc);
raw_spin_lock_irq(&desc->lock);
@@ -522,26 +644,44 @@ again:
* The thread is faster done than the hard interrupt handler
* on the other CPU. If we unmask the irq line then the
* interrupt can come in again and masks the line, leaves due
- * to IRQ_INPROGRESS and the irq line is masked forever.
+ * to IRQS_INPROGRESS and the irq line is masked forever.
+ *
+ * This also serializes the state of shared oneshot handlers
+ * versus "desc->threads_onehsot |= action->thread_mask;" in
+ * irq_wake_thread(). See the comment there which explains the
+ * serialization.
*/
- if (unlikely(desc->status & IRQ_INPROGRESS)) {
+ if (unlikely(desc->istate & IRQS_INPROGRESS)) {
raw_spin_unlock_irq(&desc->lock);
chip_bus_sync_unlock(desc);
cpu_relax();
goto again;
}
- if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
- desc->status &= ~IRQ_MASKED;
+ /*
+ * Now check again, whether the thread should run. Otherwise
+ * we would clear the threads_oneshot bit of this thread which
+ * was just set.
+ */
+ if (!force && test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+ goto out_unlock;
+
+ desc->threads_oneshot &= ~action->thread_mask;
+
+ if (!desc->threads_oneshot && !(desc->istate & IRQS_DISABLED) &&
+ (desc->istate & IRQS_MASKED)) {
+ irq_compat_clr_masked(desc);
+ desc->istate &= ~IRQS_MASKED;
desc->irq_data.chip->irq_unmask(&desc->irq_data);
}
+out_unlock:
raw_spin_unlock_irq(&desc->lock);
chip_bus_sync_unlock(desc);
}
#ifdef CONFIG_SMP
/*
- * Check whether we need to change the affinity of the interrupt thread.
+ * Check whether we need to chasnge the affinity of the interrupt thread.
*/
static void
irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
@@ -573,6 +713,32 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
#endif
/*
+ * Interrupts which are not explicitely requested as threaded
+ * interrupts rely on the implicit bh/preempt disable of the hard irq
+ * context. So we need to disable bh here to avoid deadlocks and other
+ * side effects.
+ */
+static void
+irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
+{
+ local_bh_disable();
+ action->thread_fn(action->irq, action->dev_id);
+ irq_finalize_oneshot(desc, action, false);
+ local_bh_enable();
+}
+
+/*
+ * Interrupts explicitely requested as threaded interupts want to be
+ * preemtible - many of them need to sleep and wait for slow busses to
+ * complete.
+ */
+static void irq_thread_fn(struct irq_desc *desc, struct irqaction *action)
+{
+ action->thread_fn(action->irq, action->dev_id);
+ irq_finalize_oneshot(desc, action, false);
+}
+
+/*
* Interrupt handler thread
*/
static int irq_thread(void *data)
@@ -582,7 +748,14 @@ static int irq_thread(void *data)
};
struct irqaction *action = data;
struct irq_desc *desc = irq_to_desc(action->irq);
- int wake, oneshot = desc->status & IRQ_ONESHOT;
+ void (*handler_fn)(struct irq_desc *desc, struct irqaction *action);
+ int wake;
+
+ if (force_irqthreads & test_bit(IRQTF_FORCED_THREAD,
+ &action->thread_flags))
+ handler_fn = irq_forced_thread_fn;
+ else
+ handler_fn = irq_thread_fn;
sched_setscheduler(current, SCHED_FIFO, &param);
current->irqaction = action;
@@ -594,23 +767,20 @@ static int irq_thread(void *data)
atomic_inc(&desc->threads_active);
raw_spin_lock_irq(&desc->lock);
- if (unlikely(desc->status & IRQ_DISABLED)) {
+ if (unlikely(desc->istate & IRQS_DISABLED)) {
/*
* CHECKME: We might need a dedicated
* IRQ_THREAD_PENDING flag here, which
* retriggers the thread in check_irq_resend()
- * but AFAICT IRQ_PENDING should be fine as it
+ * but AFAICT IRQS_PENDING should be fine as it
* retriggers the interrupt itself --- tglx
*/
- desc->status |= IRQ_PENDING;
+ irq_compat_set_pending(desc);
+ desc->istate |= IRQS_PENDING;
raw_spin_unlock_irq(&desc->lock);
} else {
raw_spin_unlock_irq(&desc->lock);
-
- action->thread_fn(action->irq, action->dev_id);
-
- if (oneshot)
- irq_finalize_oneshot(action->irq, desc);
+ handler_fn(desc, action);
}
wake = atomic_dec_and_test(&desc->threads_active);
@@ -619,6 +789,9 @@ static int irq_thread(void *data)
wake_up(&desc->wait_for_threads);
}
+ /* Prevent a stale desc->threads_oneshot */
+ irq_finalize_oneshot(desc, action, true);
+
/*
* Clear irqaction. Otherwise exit_irq_thread() would make
* fuzz about an active irq thread going into nirvana.
@@ -633,6 +806,7 @@ static int irq_thread(void *data)
void exit_irq_thread(void)
{
struct task_struct *tsk = current;
+ struct irq_desc *desc;
if (!tsk->irqaction)
return;
@@ -641,6 +815,14 @@ void exit_irq_thread(void)
"exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
+ desc = irq_to_desc(tsk->irqaction->irq);
+
+ /*
+ * Prevent a stale desc->threads_oneshot. Must be called
+ * before setting the IRQTF_DIED flag.
+ */
+ irq_finalize_oneshot(desc, tsk->irqaction, true);
+
/*
* Set the THREAD DIED flag to prevent further wakeups of the
* soon to be gone threaded handler.
@@ -648,6 +830,22 @@ void exit_irq_thread(void)
set_bit(IRQTF_DIED, &tsk->irqaction->flags);
}
+static void irq_setup_forced_threading(struct irqaction *new)
+{
+ if (!force_irqthreads)
+ return;
+ if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
+ return;
+
+ new->flags |= IRQF_ONESHOT;
+
+ if (!new->thread_fn) {
+ set_bit(IRQTF_FORCED_THREAD, &new->thread_flags);
+ new->thread_fn = new->handler;
+ new->handler = irq_default_primary_handler;
+ }
+}
+
/*
* Internal function to register an irqaction - typically used to
* allocate special interrupts that are part of the architecture.
@@ -657,9 +855,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
{
struct irqaction *old, **old_ptr;
const char *old_name = NULL;
- unsigned long flags;
- int nested, shared = 0;
- int ret;
+ unsigned long flags, thread_mask = 0;
+ int ret, nested, shared = 0;
+ cpumask_var_t mask;
if (!desc)
return -EINVAL;
@@ -683,15 +881,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
rand_initialize_irq(irq);
}
- /* Oneshot interrupts are not allowed with shared */
- if ((new->flags & IRQF_ONESHOT) && (new->flags & IRQF_SHARED))
- return -EINVAL;
-
/*
* Check whether the interrupt nests into another interrupt
* thread.
*/
- nested = desc->status & IRQ_NESTED_THREAD;
+ nested = irq_settings_is_nested_thread(desc);
if (nested) {
if (!new->thread_fn)
return -EINVAL;
@@ -701,6 +895,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
* dummy function which warns when called.
*/
new->handler = irq_nested_primary_handler;
+ } else {
+ irq_setup_forced_threading(new);
}
/*
@@ -724,6 +920,11 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
new->thread = t;
}
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto out_thread;
+ }
+
/*
* The following block of code has to be executed atomically
*/
@@ -735,29 +936,40 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
* Can't share interrupts unless both agree to and are
* the same type (level, edge, polarity). So both flag
* fields must have IRQF_SHARED set and the bits which
- * set the trigger type must match.
+ * set the trigger type must match. Also all must
+ * agree on ONESHOT.
*/
if (!((old->flags & new->flags) & IRQF_SHARED) ||
- ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK)) {
+ ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK) ||
+ ((old->flags ^ new->flags) & IRQF_ONESHOT)) {
old_name = old->name;
goto mismatch;
}
-#if defined(CONFIG_IRQ_PER_CPU)
/* All handlers must agree on per-cpuness */
if ((old->flags & IRQF_PERCPU) !=
(new->flags & IRQF_PERCPU))
goto mismatch;
-#endif
/* add new interrupt at end of irq queue */
do {
+ thread_mask |= old->thread_mask;
old_ptr = &old->next;
old = *old_ptr;
} while (old);
shared = 1;
}
+ /*
+ * Setup the thread mask for this irqaction. Unlikely to have
+ * 32 resp 64 irqs sharing one line, but who knows.
+ */
+ if (new->flags & IRQF_ONESHOT && thread_mask == ~0UL) {
+ ret = -EBUSY;
+ goto out_mask;
+ }
+ new->thread_mask = 1 << ffz(thread_mask);
+
if (!shared) {
irq_chip_set_defaults(desc->irq_data.chip);
@@ -769,42 +981,44 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
new->flags & IRQF_TRIGGER_MASK);
if (ret)
- goto out_thread;
- } else
- compat_irq_chip_set_default_handler(desc);
-#if defined(CONFIG_IRQ_PER_CPU)
- if (new->flags & IRQF_PERCPU)
- desc->status |= IRQ_PER_CPU;
-#endif
+ goto out_mask;
+ }
- desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING | IRQ_ONESHOT |
- IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED);
+ desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
+ IRQS_INPROGRESS | IRQS_ONESHOT | \
+ IRQS_WAITING);
+
+ if (new->flags & IRQF_PERCPU) {
+ irqd_set(&desc->irq_data, IRQD_PER_CPU);
+ irq_settings_set_per_cpu(desc);
+ }
if (new->flags & IRQF_ONESHOT)
- desc->status |= IRQ_ONESHOT;
+ desc->istate |= IRQS_ONESHOT;
- if (!(desc->status & IRQ_NOAUTOEN)) {
- desc->depth = 0;
- desc->status &= ~IRQ_DISABLED;
- desc->irq_data.chip->irq_startup(&desc->irq_data);
- } else
+ if (irq_settings_can_autoenable(desc))
+ irq_startup(desc);
+ else
/* Undo nested disables: */
desc->depth = 1;
/* Exclude IRQ from balancing if requested */
- if (new->flags & IRQF_NOBALANCING)
- desc->status |= IRQ_NO_BALANCING;
+ if (new->flags & IRQF_NOBALANCING) {
+ irq_settings_set_no_balancing(desc);
+ irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
+ }
/* Set default affinity mask once everything is setup */
- setup_affinity(irq, desc);
-
- } else if ((new->flags & IRQF_TRIGGER_MASK)
- && (new->flags & IRQF_TRIGGER_MASK)
- != (desc->status & IRQ_TYPE_SENSE_MASK)) {
- /* hope the handler works with the actual trigger mode... */
- pr_warning("IRQ %d uses trigger mode %d; requested %d\n",
- irq, (int)(desc->status & IRQ_TYPE_SENSE_MASK),
- (int)(new->flags & IRQF_TRIGGER_MASK));
+ setup_affinity(irq, desc, mask);
+
+ } else if (new->flags & IRQF_TRIGGER_MASK) {
+ unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK;
+ unsigned int omsk = irq_settings_get_trigger_mask(desc);
+
+ if (nmsk != omsk)
+ /* hope the handler works with current trigger mode */
+ pr_warning("IRQ %d uses trigger mode %u; requested %u\n",
+ irq, nmsk, omsk);
}
new->irq = irq;
@@ -818,8 +1032,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
* Check whether we disabled the irq via the spurious handler
* before. Reenable it and give it another chance.
*/
- if (shared && (desc->status & IRQ_SPURIOUS_DISABLED)) {
- desc->status &= ~IRQ_SPURIOUS_DISABLED;
+ if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
+ desc->istate &= ~IRQS_SPURIOUS_DISABLED;
__enable_irq(desc, irq, false);
}
@@ -849,6 +1063,9 @@ mismatch:
#endif
ret = -EBUSY;
+out_mask:
+ free_cpumask_var(mask);
+
out_thread:
raw_spin_unlock_irqrestore(&desc->lock, flags);
if (new->thread) {
@@ -871,9 +1088,14 @@ out_thread:
*/
int setup_irq(unsigned int irq, struct irqaction *act)
{
+ int retval;
struct irq_desc *desc = irq_to_desc(irq);
- return __setup_irq(irq, desc, act);
+ chip_bus_lock(desc);
+ retval = __setup_irq(irq, desc, act);
+ chip_bus_sync_unlock(desc);
+
+ return retval;
}
EXPORT_SYMBOL_GPL(setup_irq);
@@ -924,13 +1146,8 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
#endif
/* If this was the last handler, shut down the IRQ line: */
- if (!desc->action) {
- desc->status |= IRQ_DISABLED;
- if (desc->irq_data.chip->irq_shutdown)
- desc->irq_data.chip->irq_shutdown(&desc->irq_data);
- else
- desc->irq_data.chip->irq_disable(&desc->irq_data);
- }
+ if (!desc->action)
+ irq_shutdown(desc);
#ifdef CONFIG_SMP
/* make sure affinity_hint is cleaned up */
@@ -1004,6 +1221,11 @@ void free_irq(unsigned int irq, void *dev_id)
if (!desc)
return;
+#ifdef CONFIG_SMP
+ if (WARN_ON(desc->affinity_notify))
+ desc->affinity_notify = NULL;
+#endif
+
chip_bus_lock(desc);
kfree(__free_irq(irq, dev_id));
chip_bus_sync_unlock(desc);
@@ -1074,7 +1296,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
if (!desc)
return -EINVAL;
- if (desc->status & IRQ_NOREQUEST)
+ if (!irq_settings_can_request(desc))
return -EINVAL;
if (!handler) {
@@ -1100,7 +1322,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
if (retval)
kfree(action);
-#ifdef CONFIG_DEBUG_SHIRQ
+#ifdef CONFIG_DEBUG_SHIRQ_FIXME
if (!retval && (irqflags & IRQF_SHARED)) {
/*
* It's a shared IRQ -- the driver ought to be prepared for it
@@ -1149,7 +1371,7 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler,
if (!desc)
return -EINVAL;
- if (desc->status & IRQ_NESTED_THREAD) {
+ if (irq_settings_is_nested_thread(desc)) {
ret = request_threaded_irq(irq, NULL, handler,
flags, name, dev_id);
return !ret ? IRQC_IS_NESTED : ret;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 441fd629ff04..ec4806d4778b 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -4,23 +4,23 @@
#include "internals.h"
-void move_masked_irq(int irq)
+void irq_move_masked_irq(struct irq_data *idata)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_chip *chip = desc->irq_data.chip;
+ struct irq_desc *desc = irq_data_to_desc(idata);
+ struct irq_chip *chip = idata->chip;
- if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+ if (likely(!irqd_is_setaffinity_pending(&desc->irq_data)))
return;
/*
* Paranoia: cpu-local interrupts shouldn't be calling in here anyway.
*/
- if (CHECK_IRQ_PER_CPU(desc->status)) {
+ if (!irqd_can_balance(&desc->irq_data)) {
WARN_ON(1);
return;
}
- desc->status &= ~IRQ_MOVE_PENDING;
+ irqd_clr_move_pending(&desc->irq_data);
if (unlikely(cpumask_empty(desc->pending_mask)))
return;
@@ -53,15 +53,20 @@ void move_masked_irq(int irq)
cpumask_clear(desc->pending_mask);
}
-void move_native_irq(int irq)
+void move_masked_irq(int irq)
+{
+ irq_move_masked_irq(irq_get_irq_data(irq));
+}
+
+void irq_move_irq(struct irq_data *idata)
{
- struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_desc *desc = irq_data_to_desc(idata);
bool masked;
- if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+ if (likely(!irqd_is_setaffinity_pending(idata)))
return;
- if (unlikely(desc->status & IRQ_DISABLED))
+ if (unlikely(desc->istate & IRQS_DISABLED))
return;
/*
@@ -69,10 +74,15 @@ void move_native_irq(int irq)
* threaded interrupt with ONESHOT set, we can end up with an
* interrupt storm.
*/
- masked = desc->status & IRQ_MASKED;
+ masked = desc->istate & IRQS_MASKED;
if (!masked)
- desc->irq_data.chip->irq_mask(&desc->irq_data);
- move_masked_irq(irq);
+ idata->chip->irq_mask(idata);
+ irq_move_masked_irq(idata);
if (!masked)
- desc->irq_data.chip->irq_unmask(&desc->irq_data);
+ idata->chip->irq_unmask(idata);
+}
+
+void move_native_irq(int irq)
+{
+ irq_move_irq(irq_get_irq_data(irq));
}
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 0d4005d85b03..f76fc00c9877 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -18,7 +18,7 @@
* During system-wide suspend or hibernation device drivers need to be prevented
* from receiving interrupts and this function is provided for this purpose.
* It marks all interrupt lines in use, except for the timer ones, as disabled
- * and sets the IRQ_SUSPENDED flag for each of them.
+ * and sets the IRQS_SUSPENDED flag for each of them.
*/
void suspend_device_irqs(void)
{
@@ -34,7 +34,7 @@ void suspend_device_irqs(void)
}
for_each_irq_desc(irq, desc)
- if (desc->status & IRQ_SUSPENDED)
+ if (desc->istate & IRQS_SUSPENDED)
synchronize_irq(irq);
}
EXPORT_SYMBOL_GPL(suspend_device_irqs);
@@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(suspend_device_irqs);
* resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs()
*
* Enable all interrupt lines previously disabled by suspend_device_irqs() that
- * have the IRQ_SUSPENDED flag set.
+ * have the IRQS_SUSPENDED flag set.
*/
void resume_device_irqs(void)
{
@@ -53,9 +53,6 @@ void resume_device_irqs(void)
for_each_irq_desc(irq, desc) {
unsigned long flags;
- if (!(desc->status & IRQ_SUSPENDED))
- continue;
-
raw_spin_lock_irqsave(&desc->lock, flags);
__enable_irq(desc, irq, true);
raw_spin_unlock_irqrestore(&desc->lock, flags);
@@ -71,9 +68,24 @@ int check_wakeup_irqs(void)
struct irq_desc *desc;
int irq;
- for_each_irq_desc(irq, desc)
- if ((desc->status & IRQ_WAKEUP) && (desc->status & IRQ_PENDING))
- return -EBUSY;
+ for_each_irq_desc(irq, desc) {
+ if (irqd_is_wakeup_set(&desc->irq_data)) {
+ if (desc->istate & IRQS_PENDING)
+ return -EBUSY;
+ continue;
+ }
+ /*
+ * Check the non wakeup interrupts whether they need
+ * to be masked before finally going into suspend
+ * state. That's for hardware which has no wakeup
+ * source configuration facility. The chip
+ * implementation indicates that with
+ * IRQCHIP_MASK_ON_SUSPEND.
+ */
+ if (desc->istate & IRQS_SUSPENDED &&
+ irq_desc_get_chip(desc)->flags & IRQCHIP_MASK_ON_SUSPEND)
+ mask_irq(desc);
+ }
return 0;
}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 6c8a2a9f8a7b..4cc2e5ed0bec 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -11,6 +11,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
#include "internals.h"
@@ -24,7 +25,7 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
const struct cpumask *mask = desc->irq_data.affinity;
#ifdef CONFIG_GENERIC_PENDING_IRQ
- if (desc->status & IRQ_MOVE_PENDING)
+ if (irqd_is_setaffinity_pending(&desc->irq_data))
mask = desc->pending_mask;
#endif
seq_cpumask(m, mask);
@@ -65,8 +66,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
cpumask_var_t new_value;
int err;
- if (!irq_to_desc(irq)->irq_data.chip->irq_set_affinity || no_irq_affinity ||
- irq_balancing_disabled(irq))
+ if (!irq_can_set_affinity(irq) || no_irq_affinity)
return -EIO;
if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
@@ -89,7 +89,7 @@ static ssize_t irq_affinity_proc_write(struct file *file,
if (!cpumask_intersects(new_value, cpu_online_mask)) {
/* Special case for empty set - allow the architecture
code to set default SMP affinity. */
- err = irq_select_affinity_usr(irq) ? -EINVAL : count;
+ err = irq_select_affinity_usr(irq, new_value) ? -EINVAL : count;
} else {
irq_set_affinity(irq, new_value);
err = count;
@@ -357,3 +357,65 @@ void init_irq_proc(void)
}
}
+#ifdef CONFIG_GENERIC_IRQ_SHOW
+
+int __weak arch_show_interrupts(struct seq_file *p, int prec)
+{
+ return 0;
+}
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+ static int prec;
+
+ unsigned long flags, any_count = 0;
+ int i = *(loff_t *) v, j;
+ struct irqaction *action;
+ struct irq_desc *desc;
+
+ if (i > nr_irqs)
+ return 0;
+
+ if (i == nr_irqs)
+ return arch_show_interrupts(p, prec);
+
+ /* print header and calculate the width of the first column */
+ if (i == 0) {
+ for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
+ j *= 10;
+
+ seq_printf(p, "%*s", prec + 8, "");
+ for_each_online_cpu(j)
+ seq_printf(p, "CPU%-8d", j);
+ seq_putc(p, '\n');
+ }
+
+ desc = irq_to_desc(i);
+ if (!desc)
+ return 0;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ for_each_online_cpu(j)
+ any_count |= kstat_irqs_cpu(i, j);
+ action = desc->action;
+ if (!action && !any_count)
+ goto out;
+
+ seq_printf(p, "%*d: ", prec, i);
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ seq_printf(p, " %8s", desc->irq_data.chip->name);
+ seq_printf(p, "-%-8s", desc->name);
+
+ if (action) {
+ seq_printf(p, " %s", action->name);
+ while ((action = action->next) != NULL)
+ seq_printf(p, ", %s", action->name);
+ }
+
+ seq_putc(p, '\n');
+out:
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ return 0;
+}
+#endif
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 891115a929aa..ad683a99b1ec 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -23,7 +23,7 @@
#ifdef CONFIG_HARDIRQS_SW_RESEND
/* Bitmap to handle software resend of interrupts: */
-static DECLARE_BITMAP(irqs_resend, NR_IRQS);
+static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS);
/*
* Run software resends of IRQ's
@@ -55,20 +55,19 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs, 0);
*/
void check_irq_resend(struct irq_desc *desc, unsigned int irq)
{
- unsigned int status = desc->status;
-
- /*
- * Make sure the interrupt is enabled, before resending it:
- */
- desc->irq_data.chip->irq_enable(&desc->irq_data);
-
/*
* We do not resend level type interrupts. Level type
* interrupts are resent by hardware when they are still
* active.
*/
- if ((status & (IRQ_LEVEL | IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
- desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY;
+ if (irq_settings_is_level(desc))
+ return;
+ if (desc->istate & IRQS_REPLAY)
+ return;
+ if (desc->istate & IRQS_PENDING) {
+ irq_compat_clr_pending(desc);
+ desc->istate &= ~IRQS_PENDING;
+ desc->istate |= IRQS_REPLAY;
if (!desc->irq_data.chip->irq_retrigger ||
!desc->irq_data.chip->irq_retrigger(&desc->irq_data)) {
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
new file mode 100644
index 000000000000..0227ad358272
--- /dev/null
+++ b/kernel/irq/settings.h
@@ -0,0 +1,138 @@
+/*
+ * Internal header to deal with irq_desc->status which will be renamed
+ * to irq_desc->settings.
+ */
+enum {
+ _IRQ_DEFAULT_INIT_FLAGS = IRQ_DEFAULT_INIT_FLAGS,
+ _IRQ_PER_CPU = IRQ_PER_CPU,
+ _IRQ_LEVEL = IRQ_LEVEL,
+ _IRQ_NOPROBE = IRQ_NOPROBE,
+ _IRQ_NOREQUEST = IRQ_NOREQUEST,
+ _IRQ_NOAUTOEN = IRQ_NOAUTOEN,
+ _IRQ_MOVE_PCNTXT = IRQ_MOVE_PCNTXT,
+ _IRQ_NO_BALANCING = IRQ_NO_BALANCING,
+ _IRQ_NESTED_THREAD = IRQ_NESTED_THREAD,
+ _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
+};
+
+#define IRQ_INPROGRESS GOT_YOU_MORON
+#define IRQ_REPLAY GOT_YOU_MORON
+#define IRQ_WAITING GOT_YOU_MORON
+#define IRQ_DISABLED GOT_YOU_MORON
+#define IRQ_PENDING GOT_YOU_MORON
+#define IRQ_MASKED GOT_YOU_MORON
+#define IRQ_WAKEUP GOT_YOU_MORON
+#define IRQ_MOVE_PENDING GOT_YOU_MORON
+#define IRQ_PER_CPU GOT_YOU_MORON
+#define IRQ_NO_BALANCING GOT_YOU_MORON
+#define IRQ_AFFINITY_SET GOT_YOU_MORON
+#define IRQ_LEVEL GOT_YOU_MORON
+#define IRQ_NOPROBE GOT_YOU_MORON
+#define IRQ_NOREQUEST GOT_YOU_MORON
+#define IRQ_NOAUTOEN GOT_YOU_MORON
+#define IRQ_NESTED_THREAD GOT_YOU_MORON
+#undef IRQF_MODIFY_MASK
+#define IRQF_MODIFY_MASK GOT_YOU_MORON
+
+static inline void
+irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
+{
+ desc->status &= ~(clr & _IRQF_MODIFY_MASK);
+ desc->status |= (set & _IRQF_MODIFY_MASK);
+}
+
+static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
+{
+ return desc->status & _IRQ_PER_CPU;
+}
+
+static inline void irq_settings_set_per_cpu(struct irq_desc *desc)
+{
+ desc->status |= _IRQ_PER_CPU;
+}
+
+static inline void irq_settings_set_no_balancing(struct irq_desc *desc)
+{
+ desc->status |= _IRQ_NO_BALANCING;
+}
+
+static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc)
+{
+ return desc->status & _IRQ_NO_BALANCING;
+}
+
+static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc)
+{
+ return desc->status & IRQ_TYPE_SENSE_MASK;
+}
+
+static inline void
+irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask)
+{
+ desc->status &= ~IRQ_TYPE_SENSE_MASK;
+ desc->status |= mask & IRQ_TYPE_SENSE_MASK;
+}
+
+static inline bool irq_settings_is_level(struct irq_desc *desc)
+{
+ return desc->status & _IRQ_LEVEL;
+}
+
+static inline void irq_settings_clr_level(struct irq_desc *desc)
+{
+ desc->status &= ~_IRQ_LEVEL;
+}
+
+static inline void irq_settings_set_level(struct irq_desc *desc)
+{
+ desc->status |= _IRQ_LEVEL;
+}
+
+static inline bool irq_settings_can_request(struct irq_desc *desc)
+{
+ return !(desc->status & _IRQ_NOREQUEST);
+}
+
+static inline void irq_settings_clr_norequest(struct irq_desc *desc)
+{
+ desc->status &= ~_IRQ_NOREQUEST;
+}
+
+static inline void irq_settings_set_norequest(struct irq_desc *desc)
+{
+ desc->status |= _IRQ_NOREQUEST;
+}
+
+static inline bool irq_settings_can_probe(struct irq_desc *desc)
+{
+ return !(desc->status & _IRQ_NOPROBE);
+}
+
+static inline void irq_settings_clr_noprobe(struct irq_desc *desc)
+{
+ desc->status &= ~_IRQ_NOPROBE;
+}
+
+static inline void irq_settings_set_noprobe(struct irq_desc *desc)
+{
+ desc->status |= _IRQ_NOPROBE;
+}
+
+static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc)
+{
+ return desc->status & _IRQ_MOVE_PCNTXT;
+}
+
+static inline bool irq_settings_can_autoenable(struct irq_desc *desc)
+{
+ return !(desc->status & _IRQ_NOAUTOEN);
+}
+
+static inline bool irq_settings_is_nested_thread(struct irq_desc *desc)
+{
+ return desc->status & _IRQ_NESTED_THREAD;
+}
+
+/* Nothing should touch desc->status from now on */
+#undef status
+#define status USE_THE_PROPER_WRAPPERS_YOU_MORON
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 3089d3b9d5f3..dd586ebf9c8c 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -21,70 +21,94 @@ static int irqfixup __read_mostly;
#define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
static void poll_spurious_irqs(unsigned long dummy);
static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs, 0, 0);
+static int irq_poll_cpu;
+static atomic_t irq_poll_active;
+
+/*
+ * We wait here for a poller to finish.
+ *
+ * If the poll runs on this CPU, then we yell loudly and return
+ * false. That will leave the interrupt line disabled in the worst
+ * case, but it should never happen.
+ *
+ * We wait until the poller is done and then recheck disabled and
+ * action (about to be disabled). Only if it's still active, we return
+ * true and let the handler run.
+ */
+bool irq_wait_for_poll(struct irq_desc *desc)
+{
+ if (WARN_ONCE(irq_poll_cpu == smp_processor_id(),
+ "irq poll in progress on cpu %d for irq %d\n",
+ smp_processor_id(), desc->irq_data.irq))
+ return false;
+
+#ifdef CONFIG_SMP
+ do {
+ raw_spin_unlock(&desc->lock);
+ while (desc->istate & IRQS_INPROGRESS)
+ cpu_relax();
+ raw_spin_lock(&desc->lock);
+ } while (desc->istate & IRQS_INPROGRESS);
+ /* Might have been disabled in meantime */
+ return !(desc->istate & IRQS_DISABLED) && desc->action;
+#else
+ return false;
+#endif
+}
+
/*
* Recovery handler for misrouted interrupts.
*/
-static int try_one_irq(int irq, struct irq_desc *desc)
+static int try_one_irq(int irq, struct irq_desc *desc, bool force)
{
+ irqreturn_t ret = IRQ_NONE;
struct irqaction *action;
- int ok = 0, work = 0;
raw_spin_lock(&desc->lock);
- /* Already running on another processor */
- if (desc->status & IRQ_INPROGRESS) {
- /*
- * Already running: If it is shared get the other
- * CPU to go looking for our mystery interrupt too
- */
- if (desc->action && (desc->action->flags & IRQF_SHARED))
- desc->status |= IRQ_PENDING;
- raw_spin_unlock(&desc->lock);
- return ok;
- }
- /* Honour the normal IRQ locking */
- desc->status |= IRQ_INPROGRESS;
- action = desc->action;
- raw_spin_unlock(&desc->lock);
- while (action) {
- /* Only shared IRQ handlers are safe to call */
- if (action->flags & IRQF_SHARED) {
- if (action->handler(irq, action->dev_id) ==
- IRQ_HANDLED)
- ok = 1;
- }
- action = action->next;
- }
- local_irq_disable();
- /* Now clean up the flags */
- raw_spin_lock(&desc->lock);
- action = desc->action;
+ /* PER_CPU and nested thread interrupts are never polled */
+ if (irq_settings_is_per_cpu(desc) || irq_settings_is_nested_thread(desc))
+ goto out;
/*
- * While we were looking for a fixup someone queued a real
- * IRQ clashing with our walk:
+ * Do not poll disabled interrupts unless the spurious
+ * disabled poller asks explicitely.
*/
- while ((desc->status & IRQ_PENDING) && action) {
+ if ((desc->istate & IRQS_DISABLED) && !force)
+ goto out;
+
+ /*
+ * All handlers must agree on IRQF_SHARED, so we test just the
+ * first. Check for action->next as well.
+ */
+ action = desc->action;
+ if (!action || !(action->flags & IRQF_SHARED) ||
+ (action->flags & __IRQF_TIMER) || !action->next)
+ goto out;
+
+ /* Already running on another processor */
+ if (desc->istate & IRQS_INPROGRESS) {
/*
- * Perform real IRQ processing for the IRQ we deferred
+ * Already running: If it is shared get the other
+ * CPU to go looking for our mystery interrupt too
*/
- work = 1;
- raw_spin_unlock(&desc->lock);
- handle_IRQ_event(irq, action);
- raw_spin_lock(&desc->lock);
- desc->status &= ~IRQ_PENDING;
+ irq_compat_set_pending(desc);
+ desc->istate |= IRQS_PENDING;
+ goto out;
}
- desc->status &= ~IRQ_INPROGRESS;
- /*
- * If we did actual work for the real IRQ line we must let the
- * IRQ controller clean up too
- */
- if (work)
- irq_end(irq, desc);
- raw_spin_unlock(&desc->lock);
- return ok;
+ /* Mark it poll in progress */
+ desc->istate |= IRQS_POLL_INPROGRESS;
+ do {
+ if (handle_irq_event(desc) == IRQ_HANDLED)
+ ret = IRQ_HANDLED;
+ action = desc->action;
+ } while ((desc->istate & IRQS_PENDING) && action);
+ desc->istate &= ~IRQS_POLL_INPROGRESS;
+out:
+ raw_spin_unlock(&desc->lock);
+ return ret == IRQ_HANDLED;
}
static int misrouted_irq(int irq)
@@ -92,6 +116,11 @@ static int misrouted_irq(int irq)
struct irq_desc *desc;
int i, ok = 0;
+ if (atomic_inc_return(&irq_poll_active) == 1)
+ goto out;
+
+ irq_poll_cpu = smp_processor_id();
+
for_each_irq_desc(i, desc) {
if (!i)
continue;
@@ -99,9 +128,11 @@ static int misrouted_irq(int irq)
if (i == irq) /* Already tried */
continue;
- if (try_one_irq(i, desc))
+ if (try_one_irq(i, desc, false))
ok = 1;
}
+out:
+ atomic_dec(&irq_poll_active);
/* So the caller can adjust the irq error counts */
return ok;
}
@@ -111,23 +142,28 @@ static void poll_spurious_irqs(unsigned long dummy)
struct irq_desc *desc;
int i;
+ if (atomic_inc_return(&irq_poll_active) != 1)
+ goto out;
+ irq_poll_cpu = smp_processor_id();
+
for_each_irq_desc(i, desc) {
- unsigned int status;
+ unsigned int state;
if (!i)
continue;
/* Racy but it doesn't matter */
- status = desc->status;
+ state = desc->istate;
barrier();
- if (!(status & IRQ_SPURIOUS_DISABLED))
+ if (!(state & IRQS_SPURIOUS_DISABLED))
continue;
local_irq_disable();
- try_one_irq(i, desc);
+ try_one_irq(i, desc, true);
local_irq_enable();
}
-
+out:
+ atomic_dec(&irq_poll_active);
mod_timer(&poll_spurious_irq_timer,
jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
}
@@ -139,15 +175,13 @@ static void poll_spurious_irqs(unsigned long dummy)
*
* (The other 100-of-100,000 interrupts may have been a correctly
* functioning device sharing an IRQ with the failing one)
- *
- * Called under desc->lock
*/
-
static void
__report_bad_irq(unsigned int irq, struct irq_desc *desc,
irqreturn_t action_ret)
{
struct irqaction *action;
+ unsigned long flags;
if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
printk(KERN_ERR "irq event %d: bogus return value %x\n",
@@ -159,6 +193,13 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc,
dump_stack();
printk(KERN_ERR "handlers:\n");
+ /*
+ * We need to take desc->lock here. note_interrupt() is called
+ * w/o desc->lock held, but IRQ_PROGRESS set. We might race
+ * with something else removing an action. It's ok to take
+ * desc->lock here. See synchronize_irq().
+ */
+ raw_spin_lock_irqsave(&desc->lock, flags);
action = desc->action;
while (action) {
printk(KERN_ERR "[<%p>]", action->handler);
@@ -167,6 +208,7 @@ __report_bad_irq(unsigned int irq, struct irq_desc *desc,
printk("\n");
action = action->next;
}
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
}
static void
@@ -218,6 +260,9 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
void note_interrupt(unsigned int irq, struct irq_desc *desc,
irqreturn_t action_ret)
{
+ if (desc->istate & IRQS_POLL_INPROGRESS)
+ return;
+
if (unlikely(action_ret != IRQ_HANDLED)) {
/*
* If we are seeing only the odd spurious IRQ caused by
@@ -254,9 +299,9 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
* Now kill the IRQ
*/
printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
- desc->status |= IRQ_DISABLED | IRQ_SPURIOUS_DISABLED;
+ desc->istate |= IRQS_SPURIOUS_DISABLED;
desc->depth++;
- desc->irq_data.chip->irq_disable(&desc->irq_data);
+ irq_disable(desc);
mod_timer(&poll_spurious_irq_timer,
jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 999835b6112b..ed253aa24ba4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -38,13 +38,96 @@
#include <asm/irq_regs.h>
+struct remote_function_call {
+ struct task_struct *p;
+ int (*func)(void *info);
+ void *info;
+ int ret;
+};
+
+static void remote_function(void *data)
+{
+ struct remote_function_call *tfc = data;
+ struct task_struct *p = tfc->p;
+
+ if (p) {
+ tfc->ret = -EAGAIN;
+ if (task_cpu(p) != smp_processor_id() || !task_curr(p))
+ return;
+ }
+
+ tfc->ret = tfc->func(tfc->info);
+}
+
+/**
+ * task_function_call - call a function on the cpu on which a task runs
+ * @p: the task to evaluate
+ * @func: the function to be called
+ * @info: the function call argument
+ *
+ * Calls the function @func when the task is currently running. This might
+ * be on the current CPU, which just calls the function directly
+ *
+ * returns: @func return value, or
+ * -ESRCH - when the process isn't running
+ * -EAGAIN - when the process moved away
+ */
+static int
+task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
+{
+ struct remote_function_call data = {
+ .p = p,
+ .func = func,
+ .info = info,
+ .ret = -ESRCH, /* No such (running) process */
+ };
+
+ if (task_curr(p))
+ smp_call_function_single(task_cpu(p), remote_function, &data, 1);
+
+ return data.ret;
+}
+
+/**
+ * cpu_function_call - call a function on the cpu
+ * @func: the function to be called
+ * @info: the function call argument
+ *
+ * Calls the function @func on the remote cpu.
+ *
+ * returns: @func return value or -ENXIO when the cpu is offline
+ */
+static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
+{
+ struct remote_function_call data = {
+ .p = NULL,
+ .func = func,
+ .info = info,
+ .ret = -ENXIO, /* No such CPU */
+ };
+
+ smp_call_function_single(cpu, remote_function, &data, 1);
+
+ return data.ret;
+}
+
+#define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
+ PERF_FLAG_FD_OUTPUT |\
+ PERF_FLAG_PID_CGROUP)
+
enum event_type_t {
EVENT_FLEXIBLE = 0x1,
EVENT_PINNED = 0x2,
EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
};
-atomic_t perf_task_events __read_mostly;
+/*
+ * perf_sched_events : >0 events exist
+ * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
+ */
+atomic_t perf_sched_events __read_mostly;
+static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
+
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
@@ -67,7 +150,24 @@ int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
/*
* max perf event sample rate
*/
-int sysctl_perf_event_sample_rate __read_mostly = 100000;
+#define DEFAULT_MAX_SAMPLE_RATE 100000
+int sysctl_perf_event_sample_rate __read_mostly = DEFAULT_MAX_SAMPLE_RATE;
+static int max_samples_per_tick __read_mostly =
+ DIV_ROUND_UP(DEFAULT_MAX_SAMPLE_RATE, HZ);
+
+int perf_proc_update_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (ret || !write)
+ return ret;
+
+ max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
+
+ return 0;
+}
static atomic64_t perf_event_id;
@@ -75,7 +175,11 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
enum event_type_t event_type);
static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
- enum event_type_t event_type);
+ enum event_type_t event_type,
+ struct task_struct *task);
+
+static void update_context_time(struct perf_event_context *ctx);
+static u64 perf_event_time(struct perf_event *event);
void __weak perf_event_print_debug(void) { }
@@ -89,6 +193,360 @@ static inline u64 perf_clock(void)
return local_clock();
}
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
+{
+ return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
+}
+
+#ifdef CONFIG_CGROUP_PERF
+
+/*
+ * Must ensure cgroup is pinned (css_get) before calling
+ * this function. In other words, we cannot call this function
+ * if there is no cgroup event for the current CPU context.
+ */
+static inline struct perf_cgroup *
+perf_cgroup_from_task(struct task_struct *task)
+{
+ return container_of(task_subsys_state(task, perf_subsys_id),
+ struct perf_cgroup, css);
+}
+
+static inline bool
+perf_cgroup_match(struct perf_event *event)
+{
+ struct perf_event_context *ctx = event->ctx;
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+
+ return !event->cgrp || event->cgrp == cpuctx->cgrp;
+}
+
+static inline void perf_get_cgroup(struct perf_event *event)
+{
+ css_get(&event->cgrp->css);
+}
+
+static inline void perf_put_cgroup(struct perf_event *event)
+{
+ css_put(&event->cgrp->css);
+}
+
+static inline void perf_detach_cgroup(struct perf_event *event)
+{
+ perf_put_cgroup(event);
+ event->cgrp = NULL;
+}
+
+static inline int is_cgroup_event(struct perf_event *event)
+{
+ return event->cgrp != NULL;
+}
+
+static inline u64 perf_cgroup_event_time(struct perf_event *event)
+{
+ struct perf_cgroup_info *t;
+
+ t = per_cpu_ptr(event->cgrp->info, event->cpu);
+ return t->time;
+}
+
+static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
+{
+ struct perf_cgroup_info *info;
+ u64 now;
+
+ now = perf_clock();
+
+ info = this_cpu_ptr(cgrp->info);
+
+ info->time += now - info->timestamp;
+ info->timestamp = now;
+}
+
+static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
+{
+ struct perf_cgroup *cgrp_out = cpuctx->cgrp;
+ if (cgrp_out)
+ __update_cgrp_time(cgrp_out);
+}
+
+static inline void update_cgrp_time_from_event(struct perf_event *event)
+{
+ struct perf_cgroup *cgrp;
+
+ /*
+ * ensure we access cgroup data only when needed and
+ * when we know the cgroup is pinned (css_get)
+ */
+ if (!is_cgroup_event(event))
+ return;
+
+ cgrp = perf_cgroup_from_task(current);
+ /*
+ * Do not update time when cgroup is not active
+ */
+ if (cgrp == event->cgrp)
+ __update_cgrp_time(event->cgrp);
+}
+
+static inline void
+perf_cgroup_set_timestamp(struct task_struct *task,
+ struct perf_event_context *ctx)
+{
+ struct perf_cgroup *cgrp;
+ struct perf_cgroup_info *info;
+
+ /*
+ * ctx->lock held by caller
+ * ensure we do not access cgroup data
+ * unless we have the cgroup pinned (css_get)
+ */
+ if (!task || !ctx->nr_cgroups)
+ return;
+
+ cgrp = perf_cgroup_from_task(task);
+ info = this_cpu_ptr(cgrp->info);
+ info->timestamp = ctx->timestamp;
+}
+
+#define PERF_CGROUP_SWOUT 0x1 /* cgroup switch out every event */
+#define PERF_CGROUP_SWIN 0x2 /* cgroup switch in events based on task */
+
+/*
+ * reschedule events based on the cgroup constraint of task.
+ *
+ * mode SWOUT : schedule out everything
+ * mode SWIN : schedule in based on cgroup for next
+ */
+void perf_cgroup_switch(struct task_struct *task, int mode)
+{
+ struct perf_cpu_context *cpuctx;
+ struct pmu *pmu;
+ unsigned long flags;
+
+ /*
+ * disable interrupts to avoid geting nr_cgroup
+ * changes via __perf_event_disable(). Also
+ * avoids preemption.
+ */
+ local_irq_save(flags);
+
+ /*
+ * we reschedule only in the presence of cgroup
+ * constrained events.
+ */
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(pmu, &pmus, entry) {
+
+ cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+ perf_pmu_disable(cpuctx->ctx.pmu);
+
+ /*
+ * perf_cgroup_events says at least one
+ * context on this CPU has cgroup events.
+ *
+ * ctx->nr_cgroups reports the number of cgroup
+ * events for a context.
+ */
+ if (cpuctx->ctx.nr_cgroups > 0) {
+
+ if (mode & PERF_CGROUP_SWOUT) {
+ cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+ /*
+ * must not be done before ctxswout due
+ * to event_filter_match() in event_sched_out()
+ */
+ cpuctx->cgrp = NULL;
+ }
+
+ if (mode & PERF_CGROUP_SWIN) {
+ /* set cgrp before ctxsw in to
+ * allow event_filter_match() to not
+ * have to pass task around
+ */
+ cpuctx->cgrp = perf_cgroup_from_task(task);
+ cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
+ }
+ }
+
+ perf_pmu_enable(cpuctx->ctx.pmu);
+ }
+
+ rcu_read_unlock();
+
+ local_irq_restore(flags);
+}
+
+static inline void perf_cgroup_sched_out(struct task_struct *task)
+{
+ perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+}
+
+static inline void perf_cgroup_sched_in(struct task_struct *task)
+{
+ perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+}
+
+static inline int perf_cgroup_connect(int fd, struct perf_event *event,
+ struct perf_event_attr *attr,
+ struct perf_event *group_leader)
+{
+ struct perf_cgroup *cgrp;
+ struct cgroup_subsys_state *css;
+ struct file *file;
+ int ret = 0, fput_needed;
+
+ file = fget_light(fd, &fput_needed);
+ if (!file)
+ return -EBADF;
+
+ css = cgroup_css_from_dir(file, perf_subsys_id);
+ if (IS_ERR(css)) {
+ ret = PTR_ERR(css);
+ goto out;
+ }
+
+ cgrp = container_of(css, struct perf_cgroup, css);
+ event->cgrp = cgrp;
+
+ /* must be done before we fput() the file */
+ perf_get_cgroup(event);
+
+ /*
+ * all events in a group must monitor
+ * the same cgroup because a task belongs
+ * to only one perf cgroup at a time
+ */
+ if (group_leader && group_leader->cgrp != cgrp) {
+ perf_detach_cgroup(event);
+ ret = -EINVAL;
+ }
+out:
+ fput_light(file, fput_needed);
+ return ret;
+}
+
+static inline void
+perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
+{
+ struct perf_cgroup_info *t;
+ t = per_cpu_ptr(event->cgrp->info, event->cpu);
+ event->shadow_ctx_time = now - t->timestamp;
+}
+
+static inline void
+perf_cgroup_defer_enabled(struct perf_event *event)
+{
+ /*
+ * when the current task's perf cgroup does not match
+ * the event's, we need to remember to call the
+ * perf_mark_enable() function the first time a task with
+ * a matching perf cgroup is scheduled in.
+ */
+ if (is_cgroup_event(event) && !perf_cgroup_match(event))
+ event->cgrp_defer_enabled = 1;
+}
+
+static inline void
+perf_cgroup_mark_enabled(struct perf_event *event,
+ struct perf_event_context *ctx)
+{
+ struct perf_event *sub;
+ u64 tstamp = perf_event_time(event);
+
+ if (!event->cgrp_defer_enabled)
+ return;
+
+ event->cgrp_defer_enabled = 0;
+
+ event->tstamp_enabled = tstamp - event->total_time_enabled;
+ list_for_each_entry(sub, &event->sibling_list, group_entry) {
+ if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
+ sub->tstamp_enabled = tstamp - sub->total_time_enabled;
+ sub->cgrp_defer_enabled = 0;
+ }
+ }
+}
+#else /* !CONFIG_CGROUP_PERF */
+
+static inline bool
+perf_cgroup_match(struct perf_event *event)
+{
+ return true;
+}
+
+static inline void perf_detach_cgroup(struct perf_event *event)
+{}
+
+static inline int is_cgroup_event(struct perf_event *event)
+{
+ return 0;
+}
+
+static inline u64 perf_cgroup_event_cgrp_time(struct perf_event *event)
+{
+ return 0;
+}
+
+static inline void update_cgrp_time_from_event(struct perf_event *event)
+{
+}
+
+static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
+{
+}
+
+static inline void perf_cgroup_sched_out(struct task_struct *task)
+{
+}
+
+static inline void perf_cgroup_sched_in(struct task_struct *task)
+{
+}
+
+static inline int perf_cgroup_connect(pid_t pid, struct perf_event *event,
+ struct perf_event_attr *attr,
+ struct perf_event *group_leader)
+{
+ return -EINVAL;
+}
+
+static inline void
+perf_cgroup_set_timestamp(struct task_struct *task,
+ struct perf_event_context *ctx)
+{
+}
+
+void
+perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
+{
+}
+
+static inline void
+perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
+{
+}
+
+static inline u64 perf_cgroup_event_time(struct perf_event *event)
+{
+ return 0;
+}
+
+static inline void
+perf_cgroup_defer_enabled(struct perf_event *event)
+{
+}
+
+static inline void
+perf_cgroup_mark_enabled(struct perf_event *event,
+ struct perf_event_context *ctx)
+{
+}
+#endif
+
void perf_pmu_disable(struct pmu *pmu)
{
int *count = this_cpu_ptr(pmu->pmu_disable_count);
@@ -254,7 +712,6 @@ static void perf_unpin_context(struct perf_event_context *ctx)
raw_spin_lock_irqsave(&ctx->lock, flags);
--ctx->pin_count;
raw_spin_unlock_irqrestore(&ctx->lock, flags);
- put_ctx(ctx);
}
/*
@@ -271,6 +728,10 @@ static void update_context_time(struct perf_event_context *ctx)
static u64 perf_event_time(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
+
+ if (is_cgroup_event(event))
+ return perf_cgroup_event_time(event);
+
return ctx ? ctx->time : 0;
}
@@ -285,9 +746,20 @@ static void update_event_times(struct perf_event *event)
if (event->state < PERF_EVENT_STATE_INACTIVE ||
event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
return;
-
- if (ctx->is_active)
+ /*
+ * in cgroup mode, time_enabled represents
+ * the time the event was enabled AND active
+ * tasks were in the monitored cgroup. This is
+ * independent of the activity of the context as
+ * there may be a mix of cgroup and non-cgroup events.
+ *
+ * That is why we treat cgroup events differently
+ * here.
+ */
+ if (is_cgroup_event(event))
run_end = perf_event_time(event);
+ else if (ctx->is_active)
+ run_end = ctx->time;
else
run_end = event->tstamp_stopped;
@@ -299,6 +771,7 @@ static void update_event_times(struct perf_event *event)
run_end = perf_event_time(event);
event->total_time_running = run_end - event->tstamp_running;
+
}
/*
@@ -347,6 +820,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
list_add_tail(&event->group_entry, list);
}
+ if (is_cgroup_event(event))
+ ctx->nr_cgroups++;
+
list_add_rcu(&event->event_entry, &ctx->event_list);
if (!ctx->nr_events)
perf_pmu_rotate_start(ctx->pmu);
@@ -473,6 +949,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
event->attach_state &= ~PERF_ATTACH_CONTEXT;
+ if (is_cgroup_event(event))
+ ctx->nr_cgroups--;
+
ctx->nr_events--;
if (event->attr.inherit_stat)
ctx->nr_stat--;
@@ -544,7 +1023,8 @@ out:
static inline int
event_filter_match(struct perf_event *event)
{
- return event->cpu == -1 || event->cpu == smp_processor_id();
+ return (event->cpu == -1 || event->cpu == smp_processor_id())
+ && perf_cgroup_match(event);
}
static void
@@ -562,7 +1042,7 @@ event_sched_out(struct perf_event *event,
*/
if (event->state == PERF_EVENT_STATE_INACTIVE
&& !event_filter_match(event)) {
- delta = ctx->time - event->tstamp_stopped;
+ delta = tstamp - event->tstamp_stopped;
event->tstamp_running += delta;
event->tstamp_stopped = tstamp;
}
@@ -606,47 +1086,30 @@ group_sched_out(struct perf_event *group_event,
cpuctx->exclusive = 0;
}
-static inline struct perf_cpu_context *
-__get_cpu_context(struct perf_event_context *ctx)
-{
- return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
-}
-
/*
* Cross CPU call to remove a performance event
*
* We disable the event on the hardware level first. After that we
* remove it from the context list.
*/
-static void __perf_event_remove_from_context(void *info)
+static int __perf_remove_from_context(void *info)
{
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
- /*
- * If this is a task context, we need to check whether it is
- * the current task context of this cpu. If not it has been
- * scheduled out before the smp call arrived.
- */
- if (ctx->task && cpuctx->task_ctx != ctx)
- return;
-
raw_spin_lock(&ctx->lock);
-
event_sched_out(event, cpuctx, ctx);
-
list_del_event(event, ctx);
-
raw_spin_unlock(&ctx->lock);
+
+ return 0;
}
/*
* Remove the event from a task's (or a CPU's) list of events.
*
- * Must be called with ctx->mutex held.
- *
* CPU events are removed with a smp call. For task events we only
* call when the task is on a CPU.
*
@@ -657,49 +1120,48 @@ static void __perf_event_remove_from_context(void *info)
* When called from perf_event_exit_task, it's OK because the
* context has been detached from its task.
*/
-static void perf_event_remove_from_context(struct perf_event *event)
+static void perf_remove_from_context(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task;
+ lockdep_assert_held(&ctx->mutex);
+
if (!task) {
/*
* Per cpu events are removed via an smp call and
* the removal is always successful.
*/
- smp_call_function_single(event->cpu,
- __perf_event_remove_from_context,
- event, 1);
+ cpu_function_call(event->cpu, __perf_remove_from_context, event);
return;
}
retry:
- task_oncpu_function_call(task, __perf_event_remove_from_context,
- event);
+ if (!task_function_call(task, __perf_remove_from_context, event))
+ return;
raw_spin_lock_irq(&ctx->lock);
/*
- * If the context is active we need to retry the smp call.
+ * If we failed to find a running task, but find the context active now
+ * that we've acquired the ctx->lock, retry.
*/
- if (ctx->nr_active && !list_empty(&event->group_entry)) {
+ if (ctx->is_active) {
raw_spin_unlock_irq(&ctx->lock);
goto retry;
}
/*
- * The lock prevents that this context is scheduled in so we
- * can remove the event safely, if the call above did not
- * succeed.
+ * Since the task isn't running, its safe to remove the event, us
+ * holding the ctx->lock ensures the task won't get scheduled in.
*/
- if (!list_empty(&event->group_entry))
- list_del_event(event, ctx);
+ list_del_event(event, ctx);
raw_spin_unlock_irq(&ctx->lock);
}
/*
* Cross CPU call to disable a performance event
*/
-static void __perf_event_disable(void *info)
+static int __perf_event_disable(void *info)
{
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
@@ -708,9 +1170,12 @@ static void __perf_event_disable(void *info)
/*
* If this is a per-task event, need to check whether this
* event's task is the current task on this cpu.
+ *
+ * Can trigger due to concurrent perf_event_context_sched_out()
+ * flipping contexts around.
*/
if (ctx->task && cpuctx->task_ctx != ctx)
- return;
+ return -EINVAL;
raw_spin_lock(&ctx->lock);
@@ -720,6 +1185,7 @@ static void __perf_event_disable(void *info)
*/
if (event->state >= PERF_EVENT_STATE_INACTIVE) {
update_context_time(ctx);
+ update_cgrp_time_from_event(event);
update_group_times(event);
if (event == event->group_leader)
group_sched_out(event, cpuctx, ctx);
@@ -729,6 +1195,8 @@ static void __perf_event_disable(void *info)
}
raw_spin_unlock(&ctx->lock);
+
+ return 0;
}
/*
@@ -753,13 +1221,13 @@ void perf_event_disable(struct perf_event *event)
/*
* Disable the event on the cpu that it's on
*/
- smp_call_function_single(event->cpu, __perf_event_disable,
- event, 1);
+ cpu_function_call(event->cpu, __perf_event_disable, event);
return;
}
retry:
- task_oncpu_function_call(task, __perf_event_disable, event);
+ if (!task_function_call(task, __perf_event_disable, event))
+ return;
raw_spin_lock_irq(&ctx->lock);
/*
@@ -767,6 +1235,11 @@ retry:
*/
if (event->state == PERF_EVENT_STATE_ACTIVE) {
raw_spin_unlock_irq(&ctx->lock);
+ /*
+ * Reload the task pointer, it might have been changed by
+ * a concurrent perf_event_context_sched_out().
+ */
+ task = ctx->task;
goto retry;
}
@@ -778,10 +1251,48 @@ retry:
update_group_times(event);
event->state = PERF_EVENT_STATE_OFF;
}
-
raw_spin_unlock_irq(&ctx->lock);
}
+static void perf_set_shadow_time(struct perf_event *event,
+ struct perf_event_context *ctx,
+ u64 tstamp)
+{
+ /*
+ * use the correct time source for the time snapshot
+ *
+ * We could get by without this by leveraging the
+ * fact that to get to this function, the caller
+ * has most likely already called update_context_time()
+ * and update_cgrp_time_xx() and thus both timestamp
+ * are identical (or very close). Given that tstamp is,
+ * already adjusted for cgroup, we could say that:
+ * tstamp - ctx->timestamp
+ * is equivalent to
+ * tstamp - cgrp->timestamp.
+ *
+ * Then, in perf_output_read(), the calculation would
+ * work with no changes because:
+ * - event is guaranteed scheduled in
+ * - no scheduled out in between
+ * - thus the timestamp would be the same
+ *
+ * But this is a bit hairy.
+ *
+ * So instead, we have an explicit cgroup call to remain
+ * within the time time source all along. We believe it
+ * is cleaner and simpler to understand.
+ */
+ if (is_cgroup_event(event))
+ perf_cgroup_set_shadow_time(event, tstamp);
+ else
+ event->shadow_ctx_time = tstamp - ctx->timestamp;
+}
+
+#define MAX_INTERRUPTS (~0ULL)
+
+static void perf_log_throttle(struct perf_event *event, int enable);
+
static int
event_sched_in(struct perf_event *event,
struct perf_cpu_context *cpuctx,
@@ -794,6 +1305,17 @@ event_sched_in(struct perf_event *event,
event->state = PERF_EVENT_STATE_ACTIVE;
event->oncpu = smp_processor_id();
+
+ /*
+ * Unthrottle events, since we scheduled we might have missed several
+ * ticks already, also for a heavily scheduling task there is little
+ * guarantee it'll get a tick in a timely manner.
+ */
+ if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) {
+ perf_log_throttle(event, 1);
+ event->hw.interrupts = 0;
+ }
+
/*
* The new state must be visible before we turn it on in the hardware:
*/
@@ -807,7 +1329,7 @@ event_sched_in(struct perf_event *event,
event->tstamp_running += tstamp - event->tstamp_stopped;
- event->shadow_ctx_time = tstamp - ctx->timestamp;
+ perf_set_shadow_time(event, ctx, tstamp);
if (!is_software_event(event))
cpuctx->active_oncpu++;
@@ -928,12 +1450,15 @@ static void add_event_to_ctx(struct perf_event *event,
event->tstamp_stopped = tstamp;
}
+static void perf_event_context_sched_in(struct perf_event_context *ctx,
+ struct task_struct *tsk);
+
/*
* Cross CPU call to install and enable a performance event
*
* Must be called with ctx->mutex held
*/
-static void __perf_install_in_context(void *info)
+static int __perf_install_in_context(void *info)
{
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
@@ -942,21 +1467,22 @@ static void __perf_install_in_context(void *info)
int err;
/*
- * If this is a task context, we need to check whether it is
- * the current task context of this cpu. If not it has been
- * scheduled out before the smp call arrived.
- * Or possibly this is the right context but it isn't
- * on this cpu because it had no events.
+ * In case we're installing a new context to an already running task,
+ * could also happen before perf_event_task_sched_in() on architectures
+ * which do context switches with IRQs enabled.
*/
- if (ctx->task && cpuctx->task_ctx != ctx) {
- if (cpuctx->task_ctx || ctx->task != current)
- return;
- cpuctx->task_ctx = ctx;
- }
+ if (ctx->task && !cpuctx->task_ctx)
+ perf_event_context_sched_in(ctx, ctx->task);
raw_spin_lock(&ctx->lock);
ctx->is_active = 1;
update_context_time(ctx);
+ /*
+ * update cgrp time only if current cgrp
+ * matches event->cgrp. Must be done before
+ * calling add_event_to_ctx()
+ */
+ update_cgrp_time_from_event(event);
add_event_to_ctx(event, ctx);
@@ -997,6 +1523,8 @@ static void __perf_install_in_context(void *info)
unlock:
raw_spin_unlock(&ctx->lock);
+
+ return 0;
}
/*
@@ -1008,8 +1536,6 @@ unlock:
* If the event is attached to a task which is on a CPU we use a smp
* call to enable it in the task context. The task might have been
* scheduled away, but we check this in the smp call again.
- *
- * Must be called with ctx->mutex held.
*/
static void
perf_install_in_context(struct perf_event_context *ctx,
@@ -1018,6 +1544,8 @@ perf_install_in_context(struct perf_event_context *ctx,
{
struct task_struct *task = ctx->task;
+ lockdep_assert_held(&ctx->mutex);
+
event->ctx = ctx;
if (!task) {
@@ -1025,31 +1553,29 @@ perf_install_in_context(struct perf_event_context *ctx,
* Per cpu events are installed via an smp call and
* the install is always successful.
*/
- smp_call_function_single(cpu, __perf_install_in_context,
- event, 1);
+ cpu_function_call(cpu, __perf_install_in_context, event);
return;
}
retry:
- task_oncpu_function_call(task, __perf_install_in_context,
- event);
+ if (!task_function_call(task, __perf_install_in_context, event))
+ return;
raw_spin_lock_irq(&ctx->lock);
/*
- * we need to retry the smp call.
+ * If we failed to find a running task, but find the context active now
+ * that we've acquired the ctx->lock, retry.
*/
- if (ctx->is_active && list_empty(&event->group_entry)) {
+ if (ctx->is_active) {
raw_spin_unlock_irq(&ctx->lock);
goto retry;
}
/*
- * The lock prevents that this context is scheduled in so we
- * can add the event safely, if it the call above did not
- * succeed.
+ * Since the task isn't running, its safe to add the event, us holding
+ * the ctx->lock ensures the task won't get scheduled in.
*/
- if (list_empty(&event->group_entry))
- add_event_to_ctx(event, ctx);
+ add_event_to_ctx(event, ctx);
raw_spin_unlock_irq(&ctx->lock);
}
@@ -1078,7 +1604,7 @@ static void __perf_event_mark_enabled(struct perf_event *event,
/*
* Cross CPU call to enable a performance event
*/
-static void __perf_event_enable(void *info)
+static int __perf_event_enable(void *info)
{
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
@@ -1086,26 +1612,27 @@ static void __perf_event_enable(void *info)
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
int err;
- /*
- * If this is a per-task event, need to check whether this
- * event's task is the current task on this cpu.
- */
- if (ctx->task && cpuctx->task_ctx != ctx) {
- if (cpuctx->task_ctx || ctx->task != current)
- return;
- cpuctx->task_ctx = ctx;
- }
+ if (WARN_ON_ONCE(!ctx->is_active))
+ return -EINVAL;
raw_spin_lock(&ctx->lock);
- ctx->is_active = 1;
update_context_time(ctx);
if (event->state >= PERF_EVENT_STATE_INACTIVE)
goto unlock;
+
+ /*
+ * set current task's cgroup time reference point
+ */
+ perf_cgroup_set_timestamp(current, ctx);
+
__perf_event_mark_enabled(event, ctx);
- if (!event_filter_match(event))
+ if (!event_filter_match(event)) {
+ if (is_cgroup_event(event))
+ perf_cgroup_defer_enabled(event);
goto unlock;
+ }
/*
* If the event is in a group and isn't the group leader,
@@ -1138,6 +1665,8 @@ static void __perf_event_enable(void *info)
unlock:
raw_spin_unlock(&ctx->lock);
+
+ return 0;
}
/*
@@ -1158,8 +1687,7 @@ void perf_event_enable(struct perf_event *event)
/*
* Enable the event on the cpu that it's on
*/
- smp_call_function_single(event->cpu, __perf_event_enable,
- event, 1);
+ cpu_function_call(event->cpu, __perf_event_enable, event);
return;
}
@@ -1178,8 +1706,15 @@ void perf_event_enable(struct perf_event *event)
event->state = PERF_EVENT_STATE_OFF;
retry:
+ if (!ctx->is_active) {
+ __perf_event_mark_enabled(event, ctx);
+ goto out;
+ }
+
raw_spin_unlock_irq(&ctx->lock);
- task_oncpu_function_call(task, __perf_event_enable, event);
+
+ if (!task_function_call(task, __perf_event_enable, event))
+ return;
raw_spin_lock_irq(&ctx->lock);
@@ -1187,15 +1722,14 @@ retry:
* If the context is active and the event is still off,
* we need to retry the cross-call.
*/
- if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF)
+ if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
+ /*
+ * task could have been flipped by a concurrent
+ * perf_event_context_sched_out()
+ */
+ task = ctx->task;
goto retry;
-
- /*
- * Since we have the lock this context can't be scheduled
- * in, so we can change the state safely.
- */
- if (event->state == PERF_EVENT_STATE_OFF)
- __perf_event_mark_enabled(event, ctx);
+ }
out:
raw_spin_unlock_irq(&ctx->lock);
@@ -1227,6 +1761,7 @@ static void ctx_sched_out(struct perf_event_context *ctx,
if (likely(!ctx->nr_events))
goto out;
update_context_time(ctx);
+ update_cgrp_time_from_cpuctx(cpuctx);
if (!ctx->nr_active)
goto out;
@@ -1339,8 +1874,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
}
}
-void perf_event_context_sched_out(struct task_struct *task, int ctxn,
- struct task_struct *next)
+static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
+ struct task_struct *next)
{
struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
struct perf_event_context *next_ctx;
@@ -1416,6 +1951,14 @@ void __perf_event_task_sched_out(struct task_struct *task,
for_each_task_context_nr(ctxn)
perf_event_context_sched_out(task, ctxn, next);
+
+ /*
+ * if cgroup events exist on this CPU, then we need
+ * to check if we have to switch out PMU state.
+ * cgroup event are system-wide mode only
+ */
+ if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
+ perf_cgroup_sched_out(task);
}
static void task_ctx_sched_out(struct perf_event_context *ctx,
@@ -1454,6 +1997,10 @@ ctx_pinned_sched_in(struct perf_event_context *ctx,
if (!event_filter_match(event))
continue;
+ /* may need to reset tstamp_enabled */
+ if (is_cgroup_event(event))
+ perf_cgroup_mark_enabled(event, ctx);
+
if (group_can_go_on(event, cpuctx, 1))
group_sched_in(event, cpuctx, ctx);
@@ -1486,6 +2033,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
if (!event_filter_match(event))
continue;
+ /* may need to reset tstamp_enabled */
+ if (is_cgroup_event(event))
+ perf_cgroup_mark_enabled(event, ctx);
+
if (group_can_go_on(event, cpuctx, can_add_hw)) {
if (group_sched_in(event, cpuctx, ctx))
can_add_hw = 0;
@@ -1496,15 +2047,19 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
static void
ctx_sched_in(struct perf_event_context *ctx,
struct perf_cpu_context *cpuctx,
- enum event_type_t event_type)
+ enum event_type_t event_type,
+ struct task_struct *task)
{
+ u64 now;
+
raw_spin_lock(&ctx->lock);
ctx->is_active = 1;
if (likely(!ctx->nr_events))
goto out;
- ctx->timestamp = perf_clock();
-
+ now = perf_clock();
+ ctx->timestamp = now;
+ perf_cgroup_set_timestamp(task, ctx);
/*
* First go through the list and put on any pinned groups
* in order to give them the best chance of going on.
@@ -1521,11 +2076,12 @@ out:
}
static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
- enum event_type_t event_type)
+ enum event_type_t event_type,
+ struct task_struct *task)
{
struct perf_event_context *ctx = &cpuctx->ctx;
- ctx_sched_in(ctx, cpuctx, event_type);
+ ctx_sched_in(ctx, cpuctx, event_type, task);
}
static void task_ctx_sched_in(struct perf_event_context *ctx,
@@ -1533,15 +2089,16 @@ static void task_ctx_sched_in(struct perf_event_context *ctx,
{
struct perf_cpu_context *cpuctx;
- cpuctx = __get_cpu_context(ctx);
+ cpuctx = __get_cpu_context(ctx);
if (cpuctx->task_ctx == ctx)
return;
- ctx_sched_in(ctx, cpuctx, event_type);
+ ctx_sched_in(ctx, cpuctx, event_type, NULL);
cpuctx->task_ctx = ctx;
}
-void perf_event_context_sched_in(struct perf_event_context *ctx)
+static void perf_event_context_sched_in(struct perf_event_context *ctx,
+ struct task_struct *task)
{
struct perf_cpu_context *cpuctx;
@@ -1557,9 +2114,9 @@ void perf_event_context_sched_in(struct perf_event_context *ctx)
*/
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
- ctx_sched_in(ctx, cpuctx, EVENT_PINNED);
- cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
- ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
+ ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
+ cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
+ ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
cpuctx->task_ctx = ctx;
@@ -1592,14 +2149,17 @@ void __perf_event_task_sched_in(struct task_struct *task)
if (likely(!ctx))
continue;
- perf_event_context_sched_in(ctx);
+ perf_event_context_sched_in(ctx, task);
}
+ /*
+ * if cgroup events exist on this CPU, then we need
+ * to check if we have to switch in PMU state.
+ * cgroup event are system-wide mode only
+ */
+ if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
+ perf_cgroup_sched_in(task);
}
-#define MAX_INTERRUPTS (~0ULL)
-
-static void perf_log_throttle(struct perf_event *event, int enable);
-
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
{
u64 frequency = event->attr.sample_freq;
@@ -1627,7 +2187,7 @@ static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
* Reduce accuracy by one bit such that @a and @b converge
* to a similar magnitude.
*/
-#define REDUCE_FLS(a, b) \
+#define REDUCE_FLS(a, b) \
do { \
if (a##_fls > b##_fls) { \
a >>= 1; \
@@ -1797,7 +2357,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
if (ctx)
rotate_ctx(ctx);
- cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
+ cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current);
if (ctx)
task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
@@ -1876,7 +2436,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
raw_spin_unlock(&ctx->lock);
- perf_event_context_sched_in(ctx);
+ perf_event_context_sched_in(ctx, ctx->task);
out:
local_irq_restore(flags);
}
@@ -1901,8 +2461,10 @@ static void __perf_event_read(void *info)
return;
raw_spin_lock(&ctx->lock);
- if (ctx->is_active)
+ if (ctx->is_active) {
update_context_time(ctx);
+ update_cgrp_time_from_event(event);
+ }
update_event_times(event);
if (event->state == PERF_EVENT_STATE_ACTIVE)
event->pmu->read(event);
@@ -1933,8 +2495,10 @@ static u64 perf_event_read(struct perf_event *event)
* (e.g., thread is blocked), in that case
* we cannot update context time
*/
- if (ctx->is_active)
+ if (ctx->is_active) {
update_context_time(ctx);
+ update_cgrp_time_from_event(event);
+ }
update_event_times(event);
raw_spin_unlock_irqrestore(&ctx->lock, flags);
}
@@ -2213,6 +2777,9 @@ errout:
}
+/*
+ * Returns a matching context with refcount and pincount.
+ */
static struct perf_event_context *
find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
{
@@ -2237,6 +2804,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
get_ctx(ctx);
+ ++ctx->pin_count;
return ctx;
}
@@ -2250,6 +2818,7 @@ retry:
ctx = perf_lock_task_context(task, ctxn, &flags);
if (ctx) {
unclone_ctx(ctx);
+ ++ctx->pin_count;
raw_spin_unlock_irqrestore(&ctx->lock, flags);
}
@@ -2271,8 +2840,10 @@ retry:
err = -ESRCH;
else if (task->perf_event_ctxp[ctxn])
err = -EAGAIN;
- else
+ else {
+ ++ctx->pin_count;
rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
+ }
mutex_unlock(&task->perf_event_mutex);
if (unlikely(err)) {
@@ -2312,7 +2883,7 @@ static void free_event(struct perf_event *event)
if (!event->parent) {
if (event->attach_state & PERF_ATTACH_TASK)
- jump_label_dec(&perf_task_events);
+ jump_label_dec(&perf_sched_events);
if (event->attr.mmap || event->attr.mmap_data)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
@@ -2321,6 +2892,10 @@ static void free_event(struct perf_event *event)
atomic_dec(&nr_task_events);
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
put_callchain_buffers();
+ if (is_cgroup_event(event)) {
+ atomic_dec(&per_cpu(perf_cgroup_events, event->cpu));
+ jump_label_dec(&perf_sched_events);
+ }
}
if (event->buffer) {
@@ -2328,6 +2903,9 @@ static void free_event(struct perf_event *event)
event->buffer = NULL;
}
+ if (is_cgroup_event(event))
+ perf_detach_cgroup(event);
+
if (event->destroy)
event->destroy(event);
@@ -4395,26 +4973,14 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
if (unlikely(!is_sampling_event(event)))
return 0;
- if (!throttle) {
- hwc->interrupts++;
- } else {
- if (hwc->interrupts != MAX_INTERRUPTS) {
- hwc->interrupts++;
- if (HZ * hwc->interrupts >
- (u64)sysctl_perf_event_sample_rate) {
- hwc->interrupts = MAX_INTERRUPTS;
- perf_log_throttle(event, 0);
- ret = 1;
- }
- } else {
- /*
- * Keep re-disabling events even though on the previous
- * pass we disabled it - just in case we raced with a
- * sched-in and the event got enabled again:
- */
+ if (unlikely(hwc->interrupts >= max_samples_per_tick)) {
+ if (throttle) {
+ hwc->interrupts = MAX_INTERRUPTS;
+ perf_log_throttle(event, 0);
ret = 1;
}
- }
+ } else
+ hwc->interrupts++;
if (event->attr.freq) {
u64 now = perf_clock();
@@ -5051,6 +5617,10 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
u64 period;
event = container_of(hrtimer, struct perf_event, hw.hrtimer);
+
+ if (event->state != PERF_EVENT_STATE_ACTIVE)
+ return HRTIMER_NORESTART;
+
event->pmu->read(event);
perf_sample_data_init(&data, 0);
@@ -5077,9 +5647,6 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
if (!is_sampling_event(event))
return;
- hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- hwc->hrtimer.function = perf_swevent_hrtimer;
-
period = local64_read(&hwc->period_left);
if (period) {
if (period < 0)
@@ -5106,6 +5673,30 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event)
}
}
+static void perf_swevent_init_hrtimer(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!is_sampling_event(event))
+ return;
+
+ hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hwc->hrtimer.function = perf_swevent_hrtimer;
+
+ /*
+ * Since hrtimers have a fixed rate, we can do a static freq->period
+ * mapping and avoid the whole period adjust feedback stuff.
+ */
+ if (event->attr.freq) {
+ long freq = event->attr.sample_freq;
+
+ event->attr.sample_period = NSEC_PER_SEC / freq;
+ hwc->sample_period = event->attr.sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+ event->attr.freq = 0;
+ }
+}
+
/*
* Software event: cpu wall time clock
*/
@@ -5158,6 +5749,8 @@ static int cpu_clock_event_init(struct perf_event *event)
if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK)
return -ENOENT;
+ perf_swevent_init_hrtimer(event);
+
return 0;
}
@@ -5213,16 +5806,9 @@ static void task_clock_event_del(struct perf_event *event, int flags)
static void task_clock_event_read(struct perf_event *event)
{
- u64 time;
-
- if (!in_nmi()) {
- update_context_time(event->ctx);
- time = event->ctx->time;
- } else {
- u64 now = perf_clock();
- u64 delta = now - event->ctx->timestamp;
- time = event->ctx->time + delta;
- }
+ u64 now = perf_clock();
+ u64 delta = now - event->ctx->timestamp;
+ u64 time = event->ctx->time + delta;
task_clock_event_update(event, time);
}
@@ -5235,6 +5821,8 @@ static int task_clock_event_init(struct perf_event *event)
if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK)
return -ENOENT;
+ perf_swevent_init_hrtimer(event);
+
return 0;
}
@@ -5506,17 +6094,22 @@ struct pmu *perf_init_event(struct perf_event *event)
{
struct pmu *pmu = NULL;
int idx;
+ int ret;
idx = srcu_read_lock(&pmus_srcu);
rcu_read_lock();
pmu = idr_find(&pmu_idr, event->attr.type);
rcu_read_unlock();
- if (pmu)
+ if (pmu) {
+ ret = pmu->event_init(event);
+ if (ret)
+ pmu = ERR_PTR(ret);
goto unlock;
+ }
list_for_each_entry_rcu(pmu, &pmus, entry) {
- int ret = pmu->event_init(event);
+ ret = pmu->event_init(event);
if (!ret)
goto unlock;
@@ -5642,7 +6235,7 @@ done:
if (!event->parent) {
if (event->attach_state & PERF_ATTACH_TASK)
- jump_label_inc(&perf_task_events);
+ jump_label_inc(&perf_sched_events);
if (event->attr.mmap || event->attr.mmap_data)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
@@ -5817,7 +6410,7 @@ SYSCALL_DEFINE5(perf_event_open,
int err;
/* for future expandability... */
- if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
+ if (flags & ~PERF_FLAG_ALL)
return -EINVAL;
err = perf_copy_attr(attr_uptr, &attr);
@@ -5834,6 +6427,15 @@ SYSCALL_DEFINE5(perf_event_open,
return -EINVAL;
}
+ /*
+ * In cgroup mode, the pid argument is used to pass the fd
+ * opened to the cgroup directory in cgroupfs. The cpu argument
+ * designates the cpu on which to monitor threads from that
+ * cgroup.
+ */
+ if ((flags & PERF_FLAG_PID_CGROUP) && (pid == -1 || cpu == -1))
+ return -EINVAL;
+
event_fd = get_unused_fd_flags(O_RDWR);
if (event_fd < 0)
return event_fd;
@@ -5851,7 +6453,7 @@ SYSCALL_DEFINE5(perf_event_open,
group_leader = NULL;
}
- if (pid != -1) {
+ if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) {
task = find_lively_task_by_vpid(pid);
if (IS_ERR(task)) {
err = PTR_ERR(task);
@@ -5865,6 +6467,19 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_task;
}
+ if (flags & PERF_FLAG_PID_CGROUP) {
+ err = perf_cgroup_connect(pid, event, &attr, group_leader);
+ if (err)
+ goto err_alloc;
+ /*
+ * one more event:
+ * - that has cgroup constraint on event->cpu
+ * - that may need work on context switch
+ */
+ atomic_inc(&per_cpu(perf_cgroup_events, event->cpu));
+ jump_label_inc(&perf_sched_events);
+ }
+
/*
* Special case software events and allow them to be part of
* any hardware group.
@@ -5950,10 +6565,10 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event_context *gctx = group_leader->ctx;
mutex_lock(&gctx->mutex);
- perf_event_remove_from_context(group_leader);
+ perf_remove_from_context(group_leader);
list_for_each_entry(sibling, &group_leader->sibling_list,
group_entry) {
- perf_event_remove_from_context(sibling);
+ perf_remove_from_context(sibling);
put_ctx(gctx);
}
mutex_unlock(&gctx->mutex);
@@ -5976,6 +6591,7 @@ SYSCALL_DEFINE5(perf_event_open,
perf_install_in_context(ctx, event, cpu);
++ctx->generation;
+ perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);
event->owner = current;
@@ -6001,6 +6617,7 @@ SYSCALL_DEFINE5(perf_event_open,
return event_fd;
err_context:
+ perf_unpin_context(ctx);
put_ctx(ctx);
err_alloc:
free_event(event);
@@ -6051,6 +6668,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
mutex_lock(&ctx->mutex);
perf_install_in_context(ctx, event, cpu);
++ctx->generation;
+ perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);
return event;
@@ -6104,7 +6722,7 @@ __perf_event_exit_task(struct perf_event *child_event,
{
struct perf_event *parent_event;
- perf_event_remove_from_context(child_event);
+ perf_remove_from_context(child_event);
parent_event = child_event->parent;
/*
@@ -6411,7 +7029,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
return 0;
}
- child_ctx = child->perf_event_ctxp[ctxn];
+ child_ctx = child->perf_event_ctxp[ctxn];
if (!child_ctx) {
/*
* This is executed from the parent task context, so
@@ -6526,6 +7144,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
mutex_unlock(&parent_ctx->mutex);
perf_unpin_context(parent_ctx);
+ put_ctx(parent_ctx);
return ret;
}
@@ -6595,9 +7214,9 @@ static void __perf_event_exit_context(void *__info)
perf_pmu_rotate_stop(ctx->pmu);
list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
- __perf_event_remove_from_context(event);
+ __perf_remove_from_context(event);
list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
- __perf_event_remove_from_context(event);
+ __perf_remove_from_context(event);
}
static void perf_event_exit_cpu_context(int cpu)
@@ -6721,3 +7340,83 @@ unlock:
return ret;
}
device_initcall(perf_event_sysfs_init);
+
+#ifdef CONFIG_CGROUP_PERF
+static struct cgroup_subsys_state *perf_cgroup_create(
+ struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ struct perf_cgroup *jc;
+
+ jc = kzalloc(sizeof(*jc), GFP_KERNEL);
+ if (!jc)
+ return ERR_PTR(-ENOMEM);
+
+ jc->info = alloc_percpu(struct perf_cgroup_info);
+ if (!jc->info) {
+ kfree(jc);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return &jc->css;
+}
+
+static void perf_cgroup_destroy(struct cgroup_subsys *ss,
+ struct cgroup *cont)
+{
+ struct perf_cgroup *jc;
+ jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
+ struct perf_cgroup, css);
+ free_percpu(jc->info);
+ kfree(jc);
+}
+
+static int __perf_cgroup_move(void *info)
+{
+ struct task_struct *task = info;
+ perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
+ return 0;
+}
+
+static void perf_cgroup_move(struct task_struct *task)
+{
+ task_function_call(task, __perf_cgroup_move, task);
+}
+
+static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct cgroup *old_cgrp, struct task_struct *task,
+ bool threadgroup)
+{
+ perf_cgroup_move(task);
+ if (threadgroup) {
+ struct task_struct *c;
+ rcu_read_lock();
+ list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+ perf_cgroup_move(c);
+ }
+ rcu_read_unlock();
+ }
+}
+
+static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct cgroup *old_cgrp, struct task_struct *task)
+{
+ /*
+ * cgroup_exit() is called in the copy_process() failure path.
+ * Ignore this case since the task hasn't ran yet, this avoids
+ * trying to poke a half freed task state from generic code.
+ */
+ if (!(task->flags & PF_EXITING))
+ return;
+
+ perf_cgroup_move(task);
+}
+
+struct cgroup_subsys perf_subsys = {
+ .name = "perf_event",
+ .subsys_id = perf_subsys_id,
+ .create = perf_cgroup_create,
+ .destroy = perf_cgroup_destroy,
+ .exit = perf_cgroup_exit,
+ .attach = perf_cgroup_attach,
+};
+#endif /* CONFIG_CGROUP_PERF */
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index aeaa7f846821..0da058bff8eb 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -103,11 +103,14 @@ static struct pm_qos_object *pm_qos_array[] = {
static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
size_t count, loff_t *f_pos);
+static ssize_t pm_qos_power_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *f_pos);
static int pm_qos_power_open(struct inode *inode, struct file *filp);
static int pm_qos_power_release(struct inode *inode, struct file *filp);
static const struct file_operations pm_qos_power_fops = {
.write = pm_qos_power_write,
+ .read = pm_qos_power_read,
.open = pm_qos_power_open,
.release = pm_qos_power_release,
.llseek = noop_llseek,
@@ -376,6 +379,27 @@ static int pm_qos_power_release(struct inode *inode, struct file *filp)
}
+static ssize_t pm_qos_power_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *f_pos)
+{
+ s32 value;
+ unsigned long flags;
+ struct pm_qos_object *o;
+ struct pm_qos_request_list *pm_qos_req = filp->private_data;;
+
+ if (!pm_qos_req)
+ return -EINVAL;
+ if (!pm_qos_request_active(pm_qos_req))
+ return -EINVAL;
+
+ o = pm_qos_array[pm_qos_req->pm_qos_class];
+ spin_lock_irqsave(&pm_qos_lock, flags);
+ value = pm_qos_get_value(o);
+ spin_unlock_irqrestore(&pm_qos_lock, flags);
+
+ return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32));
+}
+
static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
size_t count, loff_t *f_pos)
{
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 05bb7173850e..67fea9d25d55 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -176,7 +176,8 @@ static inline cputime_t virt_ticks(struct task_struct *p)
return p->utime;
}
-int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
+static int
+posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
{
int error = check_clock(which_clock);
if (!error) {
@@ -194,7 +195,8 @@ int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
return error;
}
-int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
+static int
+posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
{
/*
* You can never reset a CPU clock, but we check for other errors
@@ -317,7 +319,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
}
-int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
+static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
{
const pid_t pid = CPUCLOCK_PID(which_clock);
int error = -EINVAL;
@@ -379,7 +381,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
* This is called from sys_timer_create() and do_cpu_nanosleep() with the
* new timer already all-zeros initialized.
*/
-int posix_cpu_timer_create(struct k_itimer *new_timer)
+static int posix_cpu_timer_create(struct k_itimer *new_timer)
{
int ret = 0;
const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
@@ -425,7 +427,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
* If we return TIMER_RETRY, it's necessary to release the timer's lock
* and try again. (This happens when the timer is in the middle of firing.)
*/
-int posix_cpu_timer_del(struct k_itimer *timer)
+static int posix_cpu_timer_del(struct k_itimer *timer)
{
struct task_struct *p = timer->it.cpu.task;
int ret = 0;
@@ -665,8 +667,8 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
* If we return TIMER_RETRY, it's necessary to release the timer's lock
* and try again. (This happens when the timer is in the middle of firing.)
*/
-int posix_cpu_timer_set(struct k_itimer *timer, int flags,
- struct itimerspec *new, struct itimerspec *old)
+static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
+ struct itimerspec *new, struct itimerspec *old)
{
struct task_struct *p = timer->it.cpu.task;
union cpu_time_count old_expires, new_expires, old_incr, val;
@@ -820,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
return ret;
}
-void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
+static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
{
union cpu_time_count now;
struct task_struct *p = timer->it.cpu.task;
@@ -1481,11 +1483,13 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
return error;
}
-int posix_cpu_nsleep(const clockid_t which_clock, int flags,
- struct timespec *rqtp, struct timespec __user *rmtp)
+static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
+
+static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
+ struct timespec *rqtp, struct timespec __user *rmtp)
{
struct restart_block *restart_block =
- &current_thread_info()->restart_block;
+ &current_thread_info()->restart_block;
struct itimerspec it;
int error;
@@ -1501,56 +1505,47 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags,
if (error == -ERESTART_RESTARTBLOCK) {
- if (flags & TIMER_ABSTIME)
+ if (flags & TIMER_ABSTIME)
return -ERESTARTNOHAND;
/*
- * Report back to the user the time still remaining.
- */
- if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
+ * Report back to the user the time still remaining.
+ */
+ if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
return -EFAULT;
restart_block->fn = posix_cpu_nsleep_restart;
- restart_block->arg0 = which_clock;
- restart_block->arg1 = (unsigned long) rmtp;
- restart_block->arg2 = rqtp->tv_sec;
- restart_block->arg3 = rqtp->tv_nsec;
+ restart_block->nanosleep.index = which_clock;
+ restart_block->nanosleep.rmtp = rmtp;
+ restart_block->nanosleep.expires = timespec_to_ns(rqtp);
}
return error;
}
-long posix_cpu_nsleep_restart(struct restart_block *restart_block)
+static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
{
- clockid_t which_clock = restart_block->arg0;
- struct timespec __user *rmtp;
+ clockid_t which_clock = restart_block->nanosleep.index;
struct timespec t;
struct itimerspec it;
int error;
- rmtp = (struct timespec __user *) restart_block->arg1;
- t.tv_sec = restart_block->arg2;
- t.tv_nsec = restart_block->arg3;
+ t = ns_to_timespec(restart_block->nanosleep.expires);
- restart_block->fn = do_no_restart_syscall;
error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
if (error == -ERESTART_RESTARTBLOCK) {
+ struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
/*
- * Report back to the user the time still remaining.
- */
- if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
+ * Report back to the user the time still remaining.
+ */
+ if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
return -EFAULT;
- restart_block->fn = posix_cpu_nsleep_restart;
- restart_block->arg0 = which_clock;
- restart_block->arg1 = (unsigned long) rmtp;
- restart_block->arg2 = t.tv_sec;
- restart_block->arg3 = t.tv_nsec;
+ restart_block->nanosleep.expires = timespec_to_ns(&t);
}
return error;
}
-
#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
@@ -1594,38 +1589,37 @@ static int thread_cpu_timer_create(struct k_itimer *timer)
timer->it_clock = THREAD_CLOCK;
return posix_cpu_timer_create(timer);
}
-static int thread_cpu_nsleep(const clockid_t which_clock, int flags,
- struct timespec *rqtp, struct timespec __user *rmtp)
-{
- return -EINVAL;
-}
-static long thread_cpu_nsleep_restart(struct restart_block *restart_block)
-{
- return -EINVAL;
-}
+
+struct k_clock clock_posix_cpu = {
+ .clock_getres = posix_cpu_clock_getres,
+ .clock_set = posix_cpu_clock_set,
+ .clock_get = posix_cpu_clock_get,
+ .timer_create = posix_cpu_timer_create,
+ .nsleep = posix_cpu_nsleep,
+ .nsleep_restart = posix_cpu_nsleep_restart,
+ .timer_set = posix_cpu_timer_set,
+ .timer_del = posix_cpu_timer_del,
+ .timer_get = posix_cpu_timer_get,
+};
static __init int init_posix_cpu_timers(void)
{
struct k_clock process = {
- .clock_getres = process_cpu_clock_getres,
- .clock_get = process_cpu_clock_get,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = process_cpu_timer_create,
- .nsleep = process_cpu_nsleep,
- .nsleep_restart = process_cpu_nsleep_restart,
+ .clock_getres = process_cpu_clock_getres,
+ .clock_get = process_cpu_clock_get,
+ .timer_create = process_cpu_timer_create,
+ .nsleep = process_cpu_nsleep,
+ .nsleep_restart = process_cpu_nsleep_restart,
};
struct k_clock thread = {
- .clock_getres = thread_cpu_clock_getres,
- .clock_get = thread_cpu_clock_get,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = thread_cpu_timer_create,
- .nsleep = thread_cpu_nsleep,
- .nsleep_restart = thread_cpu_nsleep_restart,
+ .clock_getres = thread_cpu_clock_getres,
+ .clock_get = thread_cpu_clock_get,
+ .timer_create = thread_cpu_timer_create,
};
struct timespec ts;
- register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
- register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
+ posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
+ posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
cputime_to_timespec(cputime_one_jiffy, &ts);
onecputick = ts.tv_nsec;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 93bd2eb2bc53..4c0124919f9a 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -41,6 +41,7 @@
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/idr.h>
+#include <linux/posix-clock.h>
#include <linux/posix-timers.h>
#include <linux/syscalls.h>
#include <linux/wait.h>
@@ -81,6 +82,14 @@ static DEFINE_SPINLOCK(idr_lock);
#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
#endif
+/*
+ * parisc wants ENOTSUP instead of EOPNOTSUPP
+ */
+#ifndef ENOTSUP
+# define ENANOSLEEP_NOTSUP EOPNOTSUPP
+#else
+# define ENANOSLEEP_NOTSUP ENOTSUP
+#endif
/*
* The timer ID is turned into a timer address by idr_find().
@@ -94,11 +103,7 @@ static DEFINE_SPINLOCK(idr_lock);
/*
* CLOCKs: The POSIX standard calls for a couple of clocks and allows us
* to implement others. This structure defines the various
- * clocks and allows the possibility of adding others. We
- * provide an interface to add clocks to the table and expect
- * the "arch" code to add at least one clock that is high
- * resolution. Here we define the standard CLOCK_REALTIME as a
- * 1/HZ resolution clock.
+ * clocks.
*
* RESOLUTION: Clock resolution is used to round up timer and interval
* times, NOT to report clock times, which are reported with as
@@ -108,20 +113,13 @@ static DEFINE_SPINLOCK(idr_lock);
* necessary code is written. The standard says we should say
* something about this issue in the documentation...
*
- * FUNCTIONS: The CLOCKs structure defines possible functions to handle
- * various clock functions. For clocks that use the standard
- * system timer code these entries should be NULL. This will
- * allow dispatch without the overhead of indirect function
- * calls. CLOCKS that depend on other sources (e.g. WWV or GPS)
- * must supply functions here, even if the function just returns
- * ENOSYS. The standard POSIX timer management code assumes the
- * following: 1.) The k_itimer struct (sched.h) is used for the
- * timer. 2.) The list, it_lock, it_clock, it_id and it_pid
- * fields are not modified by timer code.
+ * FUNCTIONS: The CLOCKs structure defines possible functions to
+ * handle various clock functions.
*
- * At this time all functions EXCEPT clock_nanosleep can be
- * redirected by the CLOCKS structure. Clock_nanosleep is in
- * there, but the code ignores it.
+ * The standard POSIX timer management code assumes the
+ * following: 1.) The k_itimer struct (sched.h) is used for
+ * the timer. 2.) The list, it_lock, it_clock, it_id and
+ * it_pid fields are not modified by timer code.
*
* Permissions: It is assumed that the clock_settime() function defined
* for each clock will take care of permission checks. Some
@@ -138,6 +136,7 @@ static struct k_clock posix_clocks[MAX_CLOCKS];
*/
static int common_nsleep(const clockid_t, int flags, struct timespec *t,
struct timespec __user *rmtp);
+static int common_timer_create(struct k_itimer *new_timer);
static void common_timer_get(struct k_itimer *, struct itimerspec *);
static int common_timer_set(struct k_itimer *, int,
struct itimerspec *, struct itimerspec *);
@@ -158,76 +157,24 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
spin_unlock_irqrestore(&timr->it_lock, flags);
}
-/*
- * Call the k_clock hook function if non-null, or the default function.
- */
-#define CLOCK_DISPATCH(clock, call, arglist) \
- ((clock) < 0 ? posix_cpu_##call arglist : \
- (posix_clocks[clock].call != NULL \
- ? (*posix_clocks[clock].call) arglist : common_##call arglist))
-
-/*
- * Default clock hook functions when the struct k_clock passed
- * to register_posix_clock leaves a function pointer null.
- *
- * The function common_CALL is the default implementation for
- * the function pointer CALL in struct k_clock.
- */
-
-static inline int common_clock_getres(const clockid_t which_clock,
- struct timespec *tp)
-{
- tp->tv_sec = 0;
- tp->tv_nsec = posix_clocks[which_clock].res;
- return 0;
-}
-
-/*
- * Get real time for posix timers
- */
-static int common_clock_get(clockid_t which_clock, struct timespec *tp)
+/* Get clock_realtime */
+static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp)
{
ktime_get_real_ts(tp);
return 0;
}
-static inline int common_clock_set(const clockid_t which_clock,
- struct timespec *tp)
+/* Set clock_realtime */
+static int posix_clock_realtime_set(const clockid_t which_clock,
+ const struct timespec *tp)
{
return do_sys_settimeofday(tp, NULL);
}
-static int common_timer_create(struct k_itimer *new_timer)
-{
- hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
- return 0;
-}
-
-static int no_timer_create(struct k_itimer *new_timer)
-{
- return -EOPNOTSUPP;
-}
-
-static int no_nsleep(const clockid_t which_clock, int flags,
- struct timespec *tsave, struct timespec __user *rmtp)
-{
- return -EOPNOTSUPP;
-}
-
-/*
- * Return nonzero if we know a priori this clockid_t value is bogus.
- */
-static inline int invalid_clockid(const clockid_t which_clock)
+static int posix_clock_realtime_adj(const clockid_t which_clock,
+ struct timex *t)
{
- if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */
- return 0;
- if ((unsigned) which_clock >= MAX_CLOCKS)
- return 1;
- if (posix_clocks[which_clock].clock_getres != NULL)
- return 0;
- if (posix_clocks[which_clock].res != 0)
- return 0;
- return 1;
+ return do_adjtimex(t);
}
/*
@@ -240,7 +187,7 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
}
/*
- * Get monotonic time for posix timers
+ * Get monotonic-raw time for posix timers
*/
static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
{
@@ -267,46 +214,70 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp
*tp = ktime_to_timespec(KTIME_LOW_RES);
return 0;
}
+
+static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
+{
+ get_monotonic_boottime(tp);
+ return 0;
+}
+
+
/*
* Initialize everything, well, just everything in Posix clocks/timers ;)
*/
static __init int init_posix_timers(void)
{
struct k_clock clock_realtime = {
- .clock_getres = hrtimer_get_res,
+ .clock_getres = hrtimer_get_res,
+ .clock_get = posix_clock_realtime_get,
+ .clock_set = posix_clock_realtime_set,
+ .clock_adj = posix_clock_realtime_adj,
+ .nsleep = common_nsleep,
+ .nsleep_restart = hrtimer_nanosleep_restart,
+ .timer_create = common_timer_create,
+ .timer_set = common_timer_set,
+ .timer_get = common_timer_get,
+ .timer_del = common_timer_del,
};
struct k_clock clock_monotonic = {
- .clock_getres = hrtimer_get_res,
- .clock_get = posix_ktime_get_ts,
- .clock_set = do_posix_clock_nosettime,
+ .clock_getres = hrtimer_get_res,
+ .clock_get = posix_ktime_get_ts,
+ .nsleep = common_nsleep,
+ .nsleep_restart = hrtimer_nanosleep_restart,
+ .timer_create = common_timer_create,
+ .timer_set = common_timer_set,
+ .timer_get = common_timer_get,
+ .timer_del = common_timer_del,
};
struct k_clock clock_monotonic_raw = {
- .clock_getres = hrtimer_get_res,
- .clock_get = posix_get_monotonic_raw,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = no_timer_create,
- .nsleep = no_nsleep,
+ .clock_getres = hrtimer_get_res,
+ .clock_get = posix_get_monotonic_raw,
};
struct k_clock clock_realtime_coarse = {
- .clock_getres = posix_get_coarse_res,
- .clock_get = posix_get_realtime_coarse,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = no_timer_create,
- .nsleep = no_nsleep,
+ .clock_getres = posix_get_coarse_res,
+ .clock_get = posix_get_realtime_coarse,
};
struct k_clock clock_monotonic_coarse = {
- .clock_getres = posix_get_coarse_res,
- .clock_get = posix_get_monotonic_coarse,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = no_timer_create,
- .nsleep = no_nsleep,
+ .clock_getres = posix_get_coarse_res,
+ .clock_get = posix_get_monotonic_coarse,
+ };
+ struct k_clock clock_boottime = {
+ .clock_getres = hrtimer_get_res,
+ .clock_get = posix_get_boottime,
+ .nsleep = common_nsleep,
+ .nsleep_restart = hrtimer_nanosleep_restart,
+ .timer_create = common_timer_create,
+ .timer_set = common_timer_set,
+ .timer_get = common_timer_get,
+ .timer_del = common_timer_del,
};
- register_posix_clock(CLOCK_REALTIME, &clock_realtime);
- register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
- register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
- register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
- register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
+ posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
+ posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
+ posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
+ posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
+ posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
+ posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
posix_timers_cache = kmem_cache_create("posix_timers_cache",
sizeof (struct k_itimer), 0, SLAB_PANIC,
@@ -482,17 +453,29 @@ static struct pid *good_sigevent(sigevent_t * event)
return task_pid(rtn);
}
-void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock)
+void posix_timers_register_clock(const clockid_t clock_id,
+ struct k_clock *new_clock)
{
if ((unsigned) clock_id >= MAX_CLOCKS) {
- printk("POSIX clock register failed for clock_id %d\n",
+ printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
+ clock_id);
+ return;
+ }
+
+ if (!new_clock->clock_get) {
+ printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
+ clock_id);
+ return;
+ }
+ if (!new_clock->clock_getres) {
+ printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
clock_id);
return;
}
posix_clocks[clock_id] = *new_clock;
}
-EXPORT_SYMBOL_GPL(register_posix_clock);
+EXPORT_SYMBOL_GPL(posix_timers_register_clock);
static struct k_itimer * alloc_posix_timer(void)
{
@@ -523,19 +506,39 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
kmem_cache_free(posix_timers_cache, tmr);
}
+static struct k_clock *clockid_to_kclock(const clockid_t id)
+{
+ if (id < 0)
+ return (id & CLOCKFD_MASK) == CLOCKFD ?
+ &clock_posix_dynamic : &clock_posix_cpu;
+
+ if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres)
+ return NULL;
+ return &posix_clocks[id];
+}
+
+static int common_timer_create(struct k_itimer *new_timer)
+{
+ hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
+ return 0;
+}
+
/* Create a POSIX.1b interval timer. */
SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
struct sigevent __user *, timer_event_spec,
timer_t __user *, created_timer_id)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct k_itimer *new_timer;
int error, new_timer_id;
sigevent_t event;
int it_id_set = IT_ID_NOT_SET;
- if (invalid_clockid(which_clock))
+ if (!kc)
return -EINVAL;
+ if (!kc->timer_create)
+ return -EOPNOTSUPP;
new_timer = alloc_posix_timer();
if (unlikely(!new_timer))
@@ -597,7 +600,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
goto out;
}
- error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
+ error = kc->timer_create(new_timer);
if (error)
goto out;
@@ -607,7 +610,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
spin_unlock_irq(&current->sighand->siglock);
return 0;
- /*
+ /*
* In the case of the timer belonging to another task, after
* the task is unlocked, the timer is owned by the other task
* and may cease to exist at any time. Don't use or modify
@@ -709,22 +712,28 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
struct itimerspec __user *, setting)
{
- struct k_itimer *timr;
struct itimerspec cur_setting;
+ struct k_itimer *timr;
+ struct k_clock *kc;
unsigned long flags;
+ int ret = 0;
timr = lock_timer(timer_id, &flags);
if (!timr)
return -EINVAL;
- CLOCK_DISPATCH(timr->it_clock, timer_get, (timr, &cur_setting));
+ kc = clockid_to_kclock(timr->it_clock);
+ if (WARN_ON_ONCE(!kc || !kc->timer_get))
+ ret = -EINVAL;
+ else
+ kc->timer_get(timr, &cur_setting);
unlock_timer(timr, flags);
- if (copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
+ if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
return -EFAULT;
- return 0;
+ return ret;
}
/*
@@ -813,6 +822,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
int error = 0;
unsigned long flag;
struct itimerspec *rtn = old_setting ? &old_spec : NULL;
+ struct k_clock *kc;
if (!new_setting)
return -EINVAL;
@@ -828,8 +838,11 @@ retry:
if (!timr)
return -EINVAL;
- error = CLOCK_DISPATCH(timr->it_clock, timer_set,
- (timr, flags, &new_spec, rtn));
+ kc = clockid_to_kclock(timr->it_clock);
+ if (WARN_ON_ONCE(!kc || !kc->timer_set))
+ error = -EINVAL;
+ else
+ error = kc->timer_set(timr, flags, &new_spec, rtn);
unlock_timer(timr, flag);
if (error == TIMER_RETRY) {
@@ -844,7 +857,7 @@ retry:
return error;
}
-static inline int common_timer_del(struct k_itimer *timer)
+static int common_timer_del(struct k_itimer *timer)
{
timer->it.real.interval.tv64 = 0;
@@ -855,7 +868,11 @@ static inline int common_timer_del(struct k_itimer *timer)
static inline int timer_delete_hook(struct k_itimer *timer)
{
- return CLOCK_DISPATCH(timer->it_clock, timer_del, (timer));
+ struct k_clock *kc = clockid_to_kclock(timer->it_clock);
+
+ if (WARN_ON_ONCE(!kc || !kc->timer_del))
+ return -EINVAL;
+ return kc->timer_del(timer);
}
/* Delete a POSIX.1b interval timer. */
@@ -927,69 +944,76 @@ void exit_itimers(struct signal_struct *sig)
}
}
-/* Not available / possible... functions */
-int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp)
-{
- return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(do_posix_clock_nosettime);
-
-int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
- struct timespec *t, struct timespec __user *r)
-{
-#ifndef ENOTSUP
- return -EOPNOTSUPP; /* aka ENOTSUP in userland for POSIX */
-#else /* parisc does define it separately. */
- return -ENOTSUP;
-#endif
-}
-EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep);
-
SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
const struct timespec __user *, tp)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec new_tp;
- if (invalid_clockid(which_clock))
+ if (!kc || !kc->clock_set)
return -EINVAL;
+
if (copy_from_user(&new_tp, tp, sizeof (*tp)))
return -EFAULT;
- return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp));
+ return kc->clock_set(which_clock, &new_tp);
}
SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
struct timespec __user *,tp)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec kernel_tp;
int error;
- if (invalid_clockid(which_clock))
+ if (!kc)
return -EINVAL;
- error = CLOCK_DISPATCH(which_clock, clock_get,
- (which_clock, &kernel_tp));
+
+ error = kc->clock_get(which_clock, &kernel_tp);
+
if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
error = -EFAULT;
return error;
+}
+
+SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
+ struct timex __user *, utx)
+{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
+ struct timex ktx;
+ int err;
+
+ if (!kc)
+ return -EINVAL;
+ if (!kc->clock_adj)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&ktx, utx, sizeof(ktx)))
+ return -EFAULT;
+
+ err = kc->clock_adj(which_clock, &ktx);
+
+ if (!err && copy_to_user(utx, &ktx, sizeof(ktx)))
+ return -EFAULT;
+ return err;
}
SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
struct timespec __user *, tp)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec rtn_tp;
int error;
- if (invalid_clockid(which_clock))
+ if (!kc)
return -EINVAL;
- error = CLOCK_DISPATCH(which_clock, clock_getres,
- (which_clock, &rtn_tp));
+ error = kc->clock_getres(which_clock, &rtn_tp);
- if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) {
+ if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp)))
error = -EFAULT;
- }
return error;
}
@@ -1009,10 +1033,13 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
const struct timespec __user *, rqtp,
struct timespec __user *, rmtp)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec t;
- if (invalid_clockid(which_clock))
+ if (!kc)
return -EINVAL;
+ if (!kc->nsleep)
+ return -ENANOSLEEP_NOTSUP;
if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
return -EFAULT;
@@ -1020,27 +1047,20 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
if (!timespec_valid(&t))
return -EINVAL;
- return CLOCK_DISPATCH(which_clock, nsleep,
- (which_clock, flags, &t, rmtp));
-}
-
-/*
- * nanosleep_restart for monotonic and realtime clocks
- */
-static int common_nsleep_restart(struct restart_block *restart_block)
-{
- return hrtimer_nanosleep_restart(restart_block);
+ return kc->nsleep(which_clock, flags, &t, rmtp);
}
/*
* This will restart clock_nanosleep. This is required only by
* compat_clock_nanosleep_restart for now.
*/
-long
-clock_nanosleep_restart(struct restart_block *restart_block)
+long clock_nanosleep_restart(struct restart_block *restart_block)
{
- clockid_t which_clock = restart_block->arg0;
+ clockid_t which_clock = restart_block->nanosleep.index;
+ struct k_clock *kc = clockid_to_kclock(which_clock);
+
+ if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
+ return -EINVAL;
- return CLOCK_DISPATCH(which_clock, nsleep_restart,
- (restart_block));
+ return kc->nsleep_restart(restart_block);
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 265729966ece..4603f08dc47b 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -1,125 +1,12 @@
-config PM
- bool "Power Management support"
- depends on !IA64_HP_SIM
- ---help---
- "Power Management" means that parts of your computer are shut
- off or put into a power conserving "sleep" mode if they are not
- being used. There are two competing standards for doing this: APM
- and ACPI. If you want to use either one, say Y here and then also
- to the requisite support below.
-
- Power Management is most important for battery powered laptop
- computers; if you have a laptop, check out the Linux Laptop home
- page on the WWW at <http://www.linux-on-laptops.com/> or
- Tuxmobil - Linux on Mobile Computers at <http://www.tuxmobil.org/>
- and the Battery Powered Linux mini-HOWTO, available from
- <http://www.tldp.org/docs.html#howto>.
-
- Note that, even if you say N here, Linux on the x86 architecture
- will issue the hlt instruction if nothing is to be done, thereby
- sending the processor to sleep and saving power.
-
-config PM_DEBUG
- bool "Power Management Debug Support"
- depends on PM
- ---help---
- This option enables various debugging support in the Power Management
- code. This is helpful when debugging and reporting PM bugs, like
- suspend support.
-
-config PM_ADVANCED_DEBUG
- bool "Extra PM attributes in sysfs for low-level debugging/testing"
- depends on PM_DEBUG
- default n
- ---help---
- Add extra sysfs attributes allowing one to access some Power Management
- fields of device objects from user space. If you are not a kernel
- developer interested in debugging/testing Power Management, say "no".
-
-config PM_VERBOSE
- bool "Verbose Power Management debugging"
- depends on PM_DEBUG
- default n
- ---help---
- This option enables verbose messages from the Power Management code.
-
-config CAN_PM_TRACE
- def_bool y
- depends on PM_DEBUG && PM_SLEEP && EXPERIMENTAL
-
-config PM_TRACE
- bool
- help
- This enables code to save the last PM event point across
- reboot. The architecture needs to support this, x86 for
- example does by saving things in the RTC, see below.
-
- The architecture specific code must provide the extern
- functions from <linux/resume-trace.h> as well as the
- <asm/resume-trace.h> header with a TRACE_RESUME() macro.
-
- The way the information is presented is architecture-
- dependent, x86 will print the information during a
- late_initcall.
-
-config PM_TRACE_RTC
- bool "Suspend/resume event tracing"
- depends on CAN_PM_TRACE
- depends on X86
- select PM_TRACE
- default n
- ---help---
- This enables some cheesy code to save the last PM event point in the
- RTC across reboots, so that you can debug a machine that just hangs
- during suspend (or more commonly, during resume).
-
- To use this debugging feature you should attempt to suspend the
- machine, reboot it and then run
-
- dmesg -s 1000000 | grep 'hash matches'
-
- CAUTION: this option will cause your machine's real-time clock to be
- set to an invalid time after a resume.
-
-config PM_SLEEP_SMP
- bool
- depends on SMP
- depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE
- depends on PM_SLEEP
- select HOTPLUG
- select HOTPLUG_CPU
- default y
-
-config PM_SLEEP
- bool
- depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
- default y
-
-config PM_SLEEP_ADVANCED_DEBUG
- bool
- depends on PM_ADVANCED_DEBUG
- default n
-
config SUSPEND
bool "Suspend to RAM and standby"
- depends on PM && ARCH_SUSPEND_POSSIBLE
+ depends on ARCH_SUSPEND_POSSIBLE
default y
---help---
Allow the system to enter sleep states in which main memory is
powered and thus its contents are preserved, such as the
suspend-to-RAM state (e.g. the ACPI S3 state).
-config PM_TEST_SUSPEND
- bool "Test suspend/resume and wakealarm during bootup"
- depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
- ---help---
- This option will let you suspend your machine during bootup, and
- make it wake up a few seconds later using an RTC wakeup alarm.
- Enable this with a kernel parameter like "test_suspend=mem".
-
- You probably want to have your system's RTC driver statically
- linked, ensuring that it's available when this test runs.
-
config SUSPEND_FREEZER
bool "Enable freezer for suspend to RAM/standby" \
if ARCH_WANTS_FREEZER_CONTROL || BROKEN
@@ -133,7 +20,7 @@ config SUSPEND_FREEZER
config HIBERNATION
bool "Hibernation (aka 'suspend to disk')"
- depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
+ depends on SWAP && ARCH_HIBERNATION_POSSIBLE
select LZO_COMPRESS
select LZO_DECOMPRESS
---help---
@@ -196,6 +83,106 @@ config PM_STD_PARTITION
suspended image to. It will simply pick the first available swap
device.
+config PM_SLEEP
+ def_bool y
+ depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
+
+config PM_SLEEP_SMP
+ def_bool y
+ depends on SMP
+ depends on ARCH_SUSPEND_POSSIBLE || ARCH_HIBERNATION_POSSIBLE
+ depends on PM_SLEEP
+ select HOTPLUG
+ select HOTPLUG_CPU
+
+config PM_RUNTIME
+ bool "Run-time PM core functionality"
+ depends on !IA64_HP_SIM
+ ---help---
+ Enable functionality allowing I/O devices to be put into energy-saving
+ (low power) states at run time (or autosuspended) after a specified
+ period of inactivity and woken up in response to a hardware-generated
+ wake-up event or a driver's request.
+
+ Hardware support is generally required for this functionality to work
+ and the bus type drivers of the buses the devices are on are
+ responsible for the actual handling of the autosuspend requests and
+ wake-up events.
+
+config PM
+ def_bool y
+ depends on PM_SLEEP || PM_RUNTIME
+
+config PM_DEBUG
+ bool "Power Management Debug Support"
+ depends on PM
+ ---help---
+ This option enables various debugging support in the Power Management
+ code. This is helpful when debugging and reporting PM bugs, like
+ suspend support.
+
+config PM_VERBOSE
+ bool "Verbose Power Management debugging"
+ depends on PM_DEBUG
+ ---help---
+ This option enables verbose messages from the Power Management code.
+
+config PM_ADVANCED_DEBUG
+ bool "Extra PM attributes in sysfs for low-level debugging/testing"
+ depends on PM_DEBUG
+ ---help---
+ Add extra sysfs attributes allowing one to access some Power Management
+ fields of device objects from user space. If you are not a kernel
+ developer interested in debugging/testing Power Management, say "no".
+
+config PM_TEST_SUSPEND
+ bool "Test suspend/resume and wakealarm during bootup"
+ depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
+ ---help---
+ This option will let you suspend your machine during bootup, and
+ make it wake up a few seconds later using an RTC wakeup alarm.
+ Enable this with a kernel parameter like "test_suspend=mem".
+
+ You probably want to have your system's RTC driver statically
+ linked, ensuring that it's available when this test runs.
+
+config CAN_PM_TRACE
+ def_bool y
+ depends on PM_DEBUG && PM_SLEEP
+
+config PM_TRACE
+ bool
+ help
+ This enables code to save the last PM event point across
+ reboot. The architecture needs to support this, x86 for
+ example does by saving things in the RTC, see below.
+
+ The architecture specific code must provide the extern
+ functions from <linux/resume-trace.h> as well as the
+ <asm/resume-trace.h> header with a TRACE_RESUME() macro.
+
+ The way the information is presented is architecture-
+ dependent, x86 will print the information during a
+ late_initcall.
+
+config PM_TRACE_RTC
+ bool "Suspend/resume event tracing"
+ depends on CAN_PM_TRACE
+ depends on X86
+ select PM_TRACE
+ ---help---
+ This enables some cheesy code to save the last PM event point in the
+ RTC across reboots, so that you can debug a machine that just hangs
+ during suspend (or more commonly, during resume).
+
+ To use this debugging feature you should attempt to suspend the
+ machine, reboot it and then run
+
+ dmesg -s 1000000 | grep 'hash matches'
+
+ CAUTION: this option will cause your machine's real-time clock to be
+ set to an invalid time after a resume.
+
config APM_EMULATION
tristate "Advanced Power Management Emulation"
depends on PM && SYS_SUPPORTS_APM_EMULATION
@@ -222,31 +209,11 @@ config APM_EMULATION
anything, try disabling/enabling this option (or disabling/enabling
APM in your BIOS).
-config PM_RUNTIME
- bool "Run-time PM core functionality"
- depends on PM
- ---help---
- Enable functionality allowing I/O devices to be put into energy-saving
- (low power) states at run time (or autosuspended) after a specified
- period of inactivity and woken up in response to a hardware-generated
- wake-up event or a driver's request.
-
- Hardware support is generally required for this functionality to work
- and the bus type drivers of the buses the devices are on are
- responsible for the actual handling of the autosuspend requests and
- wake-up events.
-
-config PM_OPS
- bool
- depends on PM_SLEEP || PM_RUNTIME
- default y
-
config ARCH_HAS_OPP
bool
config PM_OPP
bool "Operating Performance Point (OPP) Layer library"
- depends on PM
depends on ARCH_HAS_OPP
---help---
SOCs have a standard set of tuples consisting of frequency and
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 1832bd264219..aeabd26e3342 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -23,6 +23,7 @@
#include <linux/cpu.h>
#include <linux/freezer.h>
#include <linux/gfp.h>
+#include <linux/syscore_ops.h>
#include <scsi/scsi_scan.h>
#include <asm/suspend.h>
@@ -272,6 +273,8 @@ static int create_image(int platform_mode)
local_irq_disable();
error = sysdev_suspend(PMSG_FREEZE);
+ if (!error)
+ error = syscore_suspend();
if (error) {
printk(KERN_ERR "PM: Some system devices failed to power down, "
"aborting hibernation\n");
@@ -295,6 +298,7 @@ static int create_image(int platform_mode)
}
Power_up:
+ syscore_resume();
sysdev_resume();
/* NOTE: dpm_resume_noirq() is just a resume() for devices
* that suspended with irqs off ... no overall powerup.
@@ -403,6 +407,8 @@ static int resume_target_kernel(bool platform_mode)
local_irq_disable();
error = sysdev_suspend(PMSG_QUIESCE);
+ if (!error)
+ error = syscore_suspend();
if (error)
goto Enable_irqs;
@@ -429,6 +435,7 @@ static int resume_target_kernel(bool platform_mode)
restore_processor_state();
touch_softlockup_watchdog();
+ syscore_resume();
sysdev_resume();
Enable_irqs:
@@ -516,6 +523,7 @@ int hibernation_platform_enter(void)
local_irq_disable();
sysdev_suspend(PMSG_HIBERNATE);
+ syscore_suspend();
if (pm_wakeup_pending()) {
error = -EAGAIN;
goto Power_up;
@@ -526,6 +534,7 @@ int hibernation_platform_enter(void)
while (1);
Power_up:
+ syscore_resume();
sysdev_resume();
local_irq_enable();
enable_nonboot_cpus();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 7b5db6a8561e..8eaba5f27b10 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -17,9 +17,6 @@
DEFINE_MUTEX(pm_mutex);
-unsigned int pm_flags;
-EXPORT_SYMBOL(pm_flags);
-
#ifdef CONFIG_PM_SLEEP
/* Routines for PM-transition notifications */
@@ -326,7 +323,7 @@ EXPORT_SYMBOL_GPL(pm_wq);
static int __init pm_start_workqueue(void)
{
- pm_wq = alloc_workqueue("pm", WQ_FREEZEABLE, 0);
+ pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0);
return pm_wq ? 0 : -ENOMEM;
}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index d6d2a10320e0..0cf3a27a6c9d 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -22,7 +22,7 @@
*/
#define TIMEOUT (20 * HZ)
-static inline int freezeable(struct task_struct * p)
+static inline int freezable(struct task_struct * p)
{
if ((p == current) ||
(p->flags & PF_NOFREEZE) ||
@@ -53,7 +53,7 @@ static int try_to_freeze_tasks(bool sig_only)
todo = 0;
read_lock(&tasklist_lock);
do_each_thread(g, p) {
- if (frozen(p) || !freezeable(p))
+ if (frozen(p) || !freezable(p))
continue;
if (!freeze_task(p, sig_only))
@@ -167,7 +167,7 @@ static void thaw_tasks(bool nosig_only)
read_lock(&tasklist_lock);
do_each_thread(g, p) {
- if (!freezeable(p))
+ if (!freezable(p))
continue;
if (nosig_only && should_send_signal(p))
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 0dac75ea4456..ca0aacc24874 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -42,15 +42,15 @@ static void swsusp_unset_page_forbidden(struct page *);
/*
* Preferred image size in bytes (tunable via /sys/power/image_size).
- * When it is set to N, swsusp will do its best to ensure the image
- * size will not exceed N bytes, but if that is impossible, it will
- * try to create the smallest image possible.
+ * When it is set to N, the image creating code will do its best to
+ * ensure the image size will not exceed N bytes, but if that is
+ * impossible, it will try to create the smallest image possible.
*/
unsigned long image_size;
void __init hibernate_image_size_init(void)
{
- image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
+ image_size = (totalram_pages / 3) * PAGE_SIZE;
}
/* List of PBEs needed for restoring the pages that were allocated before
@@ -1519,11 +1519,8 @@ static int
swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
unsigned int nr_pages, unsigned int nr_highmem)
{
- int error = 0;
-
if (nr_highmem > 0) {
- error = get_highmem_buffer(PG_ANY);
- if (error)
+ if (get_highmem_buffer(PG_ANY))
goto err_out;
if (nr_highmem > alloc_highmem) {
nr_highmem -= alloc_highmem;
@@ -1546,7 +1543,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
err_out:
swsusp_free();
- return error;
+ return -ENOMEM;
}
asmlinkage int swsusp_save(void)
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index de6f86bfa303..2814c32aed51 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -22,6 +22,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/suspend.h>
+#include <linux/syscore_ops.h>
#include <trace/events/power.h>
#include "power.h"
@@ -163,11 +164,14 @@ static int suspend_enter(suspend_state_t state)
BUG_ON(!irqs_disabled());
error = sysdev_suspend(PMSG_SUSPEND);
+ if (!error)
+ error = syscore_suspend();
if (!error) {
if (!(suspend_test(TEST_CORE) || pm_wakeup_pending())) {
error = suspend_ops->enter(state);
events_check_enabled = false;
}
+ syscore_resume();
sysdev_resume();
}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1708b1e2972d..e2302e40b360 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -163,7 +163,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
return !err;
}
-int ptrace_attach(struct task_struct *task)
+static int ptrace_attach(struct task_struct *task)
{
int retval;
@@ -219,7 +219,7 @@ out:
* Performs checks and sets PT_PTRACED.
* Should be used by all ptrace implementations for PTRACE_TRACEME.
*/
-int ptrace_traceme(void)
+static int ptrace_traceme(void)
{
int ret = -EPERM;
@@ -293,7 +293,7 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
return false;
}
-int ptrace_detach(struct task_struct *child, unsigned int data)
+static int ptrace_detach(struct task_struct *child, unsigned int data)
{
bool dead = false;
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a23a57a976d1..f3240e987928 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -214,11 +214,12 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state)
* Ensure that queued callbacks are all executed.
* If we detect that we are nested in a RCU read-side critical
* section, we should simply fail, otherwise we would deadlock.
+ * Note that the machinery to reliably determine whether
+ * or not we are in an RCU read-side critical section
+ * exists only in the preemptible RCU implementations
+ * (TINY_PREEMPT_RCU and TREE_PREEMPT_RCU), which is why
+ * DEBUG_OBJECTS_RCU_HEAD is disallowed if !PREEMPT.
*/
-#ifndef CONFIG_PREEMPT
- WARN_ON(1);
- return 0;
-#else
if (rcu_preempt_depth() != 0 || preempt_count() != 0 ||
irqs_disabled()) {
WARN_ON(1);
@@ -229,7 +230,6 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state)
rcu_barrier_bh();
debug_object_free(head, &rcuhead_debug_descr);
return 1;
-#endif
default:
return 0;
}
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 015abaea962a..3cb8e362e883 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -852,7 +852,7 @@ void exit_rcu(void)
if (t->rcu_read_lock_nesting == 0)
return;
t->rcu_read_lock_nesting = 1;
- rcu_read_unlock();
+ __rcu_read_unlock();
}
#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 89613f97ff26..c224da41890c 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -47,7 +47,6 @@
#include <linux/srcu.h>
#include <linux/slab.h>
#include <asm/byteorder.h>
-#include <linux/sched.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index ddabb54bb5c8..3c7cbc2c33be 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -215,7 +215,6 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
put_pid(waiter->deadlock_task_pid);
TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry));
TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
- TRACE_WARN_ON(waiter->task);
memset(waiter, 0x22, sizeof(*waiter));
}
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 66cb89bc5ef1..5c9ccd380966 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -9,7 +9,6 @@
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/smp_lock.h>
#include <linux/spinlock.h>
#include <linux/sysdev.h>
#include <linux/timer.h>
@@ -27,7 +26,6 @@ struct test_thread_data {
int opcode;
int opdata;
int mutexes[MAX_RT_TEST_MUTEXES];
- int bkl;
int event;
struct sys_device sysdev;
};
@@ -46,9 +44,8 @@ enum test_opcodes {
RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */
RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */
RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */
- RTTEST_LOCKBKL, /* 9 Lock BKL */
- RTTEST_UNLOCKBKL, /* 10 Unlock BKL */
- RTTEST_SIGNAL, /* 11 Signal other test thread, data = thread id */
+ /* 9, 10 - reserved for BKL commemoration */
+ RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */
RTTEST_RESETEVENT = 98, /* 98 Reset event counter */
RTTEST_RESET = 99, /* 99 Reset all pending operations */
};
@@ -74,13 +71,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
td->mutexes[i] = 0;
}
}
-
- if (!lockwakeup && td->bkl == 4) {
-#ifdef CONFIG_LOCK_KERNEL
- unlock_kernel();
-#endif
- td->bkl = 0;
- }
return 0;
case RTTEST_RESETEVENT:
@@ -131,25 +121,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
td->mutexes[id] = 0;
return 0;
- case RTTEST_LOCKBKL:
- if (td->bkl)
- return 0;
- td->bkl = 1;
-#ifdef CONFIG_LOCK_KERNEL
- lock_kernel();
-#endif
- td->bkl = 4;
- return 0;
-
- case RTTEST_UNLOCKBKL:
- if (td->bkl != 4)
- break;
-#ifdef CONFIG_LOCK_KERNEL
- unlock_kernel();
-#endif
- td->bkl = 0;
- return 0;
-
default:
break;
}
@@ -196,7 +167,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
td->event = atomic_add_return(1, &rttest_event);
break;
- case RTTEST_LOCKBKL:
default:
break;
}
@@ -229,8 +199,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
td->event = atomic_add_return(1, &rttest_event);
return;
- case RTTEST_LOCKBKL:
- return;
default:
return;
}
@@ -380,11 +348,11 @@ static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute
spin_lock(&rttest_lock);
curr += sprintf(curr,
- "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, K: %d, M:",
+ "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:",
td->opcode, td->event, tsk->state,
(MAX_RT_PRIO - 1) - tsk->prio,
(MAX_RT_PRIO - 1) - tsk->normal_prio,
- tsk->pi_blocked_on, td->bkl);
+ tsk->pi_blocked_on);
for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
curr += sprintf(curr, "%d", td->mutexes[i]);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a9604815786a..ab449117aaf2 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -20,41 +20,34 @@
/*
* lock->owner state tracking:
*
- * lock->owner holds the task_struct pointer of the owner. Bit 0 and 1
- * are used to keep track of the "owner is pending" and "lock has
- * waiters" state.
+ * lock->owner holds the task_struct pointer of the owner. Bit 0
+ * is used to keep track of the "lock has waiters" state.
*
- * owner bit1 bit0
- * NULL 0 0 lock is free (fast acquire possible)
- * NULL 0 1 invalid state
- * NULL 1 0 Transitional State*
- * NULL 1 1 invalid state
- * taskpointer 0 0 lock is held (fast release possible)
- * taskpointer 0 1 task is pending owner
- * taskpointer 1 0 lock is held and has waiters
- * taskpointer 1 1 task is pending owner and lock has more waiters
- *
- * Pending ownership is assigned to the top (highest priority)
- * waiter of the lock, when the lock is released. The thread is woken
- * up and can now take the lock. Until the lock is taken (bit 0
- * cleared) a competing higher priority thread can steal the lock
- * which puts the woken up thread back on the waiters list.
+ * owner bit0
+ * NULL 0 lock is free (fast acquire possible)
+ * NULL 1 lock is free and has waiters and the top waiter
+ * is going to take the lock*
+ * taskpointer 0 lock is held (fast release possible)
+ * taskpointer 1 lock is held and has waiters**
*
* The fast atomic compare exchange based acquire and release is only
- * possible when bit 0 and 1 of lock->owner are 0.
+ * possible when bit 0 of lock->owner is 0.
+ *
+ * (*) It also can be a transitional state when grabbing the lock
+ * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
+ * we need to set the bit0 before looking at the lock, and the owner may be
+ * NULL in this small time, hence this can be a transitional state.
*
- * (*) There's a small time where the owner can be NULL and the
- * "lock has waiters" bit is set. This can happen when grabbing the lock.
- * To prevent a cmpxchg of the owner releasing the lock, we need to set this
- * bit before looking at the lock, hence the reason this is a transitional
- * state.
+ * (**) There is a small time when bit 0 is set but there are no
+ * waiters. This can happen when grabbing the lock in the slow path.
+ * To prevent a cmpxchg of the owner releasing the lock, we need to
+ * set this bit before looking at the lock.
*/
static void
-rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner,
- unsigned long mask)
+rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
{
- unsigned long val = (unsigned long)owner | mask;
+ unsigned long val = (unsigned long)owner;
if (rt_mutex_has_waiters(lock))
val |= RT_MUTEX_HAS_WAITERS;
@@ -203,15 +196,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* reached or the state of the chain has changed while we
* dropped the locks.
*/
- if (!waiter || !waiter->task)
+ if (!waiter)
goto out_unlock_pi;
/*
* Check the orig_waiter state. After we dropped the locks,
- * the previous owner of the lock might have released the lock
- * and made us the pending owner:
+ * the previous owner of the lock might have released the lock.
*/
- if (orig_waiter && !orig_waiter->task)
+ if (orig_waiter && !rt_mutex_owner(orig_lock))
goto out_unlock_pi;
/*
@@ -254,6 +246,17 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/* Release the task */
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ if (!rt_mutex_owner(lock)) {
+ /*
+ * If the requeue above changed the top waiter, then we need
+ * to wake the new top waiter up to try to get the lock.
+ */
+
+ if (top_waiter != rt_mutex_top_waiter(lock))
+ wake_up_process(rt_mutex_top_waiter(lock)->task);
+ raw_spin_unlock(&lock->wait_lock);
+ goto out_put_task;
+ }
put_task_struct(task);
/* Grab the next task */
@@ -296,78 +299,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
}
/*
- * Optimization: check if we can steal the lock from the
- * assigned pending owner [which might not have taken the
- * lock yet]:
- */
-static inline int try_to_steal_lock(struct rt_mutex *lock,
- struct task_struct *task)
-{
- struct task_struct *pendowner = rt_mutex_owner(lock);
- struct rt_mutex_waiter *next;
- unsigned long flags;
-
- if (!rt_mutex_owner_pending(lock))
- return 0;
-
- if (pendowner == task)
- return 1;
-
- raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
- if (task->prio >= pendowner->prio) {
- raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
- return 0;
- }
-
- /*
- * Check if a waiter is enqueued on the pending owners
- * pi_waiters list. Remove it and readjust pending owners
- * priority.
- */
- if (likely(!rt_mutex_has_waiters(lock))) {
- raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
- return 1;
- }
-
- /* No chain handling, pending owner is not blocked on anything: */
- next = rt_mutex_top_waiter(lock);
- plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
- __rt_mutex_adjust_prio(pendowner);
- raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
-
- /*
- * We are going to steal the lock and a waiter was
- * enqueued on the pending owners pi_waiters queue. So
- * we have to enqueue this waiter into
- * task->pi_waiters list. This covers the case,
- * where task is boosted because it holds another
- * lock and gets unboosted because the booster is
- * interrupted, so we would delay a waiter with higher
- * priority as task->normal_prio.
- *
- * Note: in the rare case of a SCHED_OTHER task changing
- * its priority and thus stealing the lock, next->task
- * might be task:
- */
- if (likely(next->task != task)) {
- raw_spin_lock_irqsave(&task->pi_lock, flags);
- plist_add(&next->pi_list_entry, &task->pi_waiters);
- __rt_mutex_adjust_prio(task);
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
- }
- return 1;
-}
-
-/*
* Try to take an rt-mutex
*
- * This fails
- * - when the lock has a real owner
- * - when a different pending owner exists and has higher priority than current
- *
* Must be called with lock->wait_lock held.
+ *
+ * @lock: the lock to be acquired.
+ * @task: the task which wants to acquire the lock
+ * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
*/
-static int try_to_take_rt_mutex(struct rt_mutex *lock)
+static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ struct rt_mutex_waiter *waiter)
{
/*
* We have to be careful here if the atomic speedups are
@@ -390,15 +331,52 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
*/
mark_rt_mutex_waiters(lock);
- if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current))
+ if (rt_mutex_owner(lock))
return 0;
+ /*
+ * It will get the lock because of one of these conditions:
+ * 1) there is no waiter
+ * 2) higher priority than waiters
+ * 3) it is top waiter
+ */
+ if (rt_mutex_has_waiters(lock)) {
+ if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
+ if (!waiter || waiter != rt_mutex_top_waiter(lock))
+ return 0;
+ }
+ }
+
+ if (waiter || rt_mutex_has_waiters(lock)) {
+ unsigned long flags;
+ struct rt_mutex_waiter *top;
+
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+ /* remove the queued waiter. */
+ if (waiter) {
+ plist_del(&waiter->list_entry, &lock->wait_list);
+ task->pi_blocked_on = NULL;
+ }
+
+ /*
+ * We have to enqueue the top waiter(if it exists) into
+ * task->pi_waiters list.
+ */
+ if (rt_mutex_has_waiters(lock)) {
+ top = rt_mutex_top_waiter(lock);
+ top->pi_list_entry.prio = top->list_entry.prio;
+ plist_add(&top->pi_list_entry, &task->pi_waiters);
+ }
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ }
+
/* We got the lock. */
debug_rt_mutex_lock(lock);
- rt_mutex_set_owner(lock, current, 0);
+ rt_mutex_set_owner(lock, task);
- rt_mutex_deadlock_account_lock(lock, current);
+ rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
@@ -436,6 +414,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ if (!owner)
+ return 0;
+
if (waiter == rt_mutex_top_waiter(lock)) {
raw_spin_lock_irqsave(&owner->pi_lock, flags);
plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
@@ -472,21 +453,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
/*
* Wake up the next waiter on the lock.
*
- * Remove the top waiter from the current tasks waiter list and from
- * the lock waiter list. Set it as pending owner. Then wake it up.
+ * Remove the top waiter from the current tasks waiter list and wake it up.
*
* Called with lock->wait_lock held.
*/
static void wakeup_next_waiter(struct rt_mutex *lock)
{
struct rt_mutex_waiter *waiter;
- struct task_struct *pendowner;
unsigned long flags;
raw_spin_lock_irqsave(&current->pi_lock, flags);
waiter = rt_mutex_top_waiter(lock);
- plist_del(&waiter->list_entry, &lock->wait_list);
/*
* Remove it from current->pi_waiters. We do not adjust a
@@ -495,43 +473,19 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
* lock->wait_lock.
*/
plist_del(&waiter->pi_list_entry, &current->pi_waiters);
- pendowner = waiter->task;
- waiter->task = NULL;
- rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
+ rt_mutex_set_owner(lock, NULL);
raw_spin_unlock_irqrestore(&current->pi_lock, flags);
- /*
- * Clear the pi_blocked_on variable and enqueue a possible
- * waiter into the pi_waiters list of the pending owner. This
- * prevents that in case the pending owner gets unboosted a
- * waiter with higher priority than pending-owner->normal_prio
- * is blocked on the unboosted (pending) owner.
- */
- raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
-
- WARN_ON(!pendowner->pi_blocked_on);
- WARN_ON(pendowner->pi_blocked_on != waiter);
- WARN_ON(pendowner->pi_blocked_on->lock != lock);
-
- pendowner->pi_blocked_on = NULL;
-
- if (rt_mutex_has_waiters(lock)) {
- struct rt_mutex_waiter *next;
-
- next = rt_mutex_top_waiter(lock);
- plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
- }
- raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
-
- wake_up_process(pendowner);
+ wake_up_process(waiter->task);
}
/*
- * Remove a waiter from a lock
+ * Remove a waiter from a lock and give up
*
- * Must be called with lock->wait_lock held
+ * Must be called with lock->wait_lock held and
+ * have just failed to try_to_take_rt_mutex().
*/
static void remove_waiter(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter)
@@ -543,11 +497,13 @@ static void remove_waiter(struct rt_mutex *lock,
raw_spin_lock_irqsave(&current->pi_lock, flags);
plist_del(&waiter->list_entry, &lock->wait_list);
- waiter->task = NULL;
current->pi_blocked_on = NULL;
raw_spin_unlock_irqrestore(&current->pi_lock, flags);
- if (first && owner != current) {
+ if (!owner)
+ return;
+
+ if (first) {
raw_spin_lock_irqsave(&owner->pi_lock, flags);
@@ -614,21 +570,19 @@ void rt_mutex_adjust_pi(struct task_struct *task)
* or TASK_UNINTERRUPTIBLE)
* @timeout: the pre-initialized and started timer, or NULL for none
* @waiter: the pre-initialized rt_mutex_waiter
- * @detect_deadlock: passed to task_blocks_on_rt_mutex
*
* lock->wait_lock must be held by the caller.
*/
static int __sched
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- struct rt_mutex_waiter *waiter,
- int detect_deadlock)
+ struct rt_mutex_waiter *waiter)
{
int ret = 0;
for (;;) {
/* Try to acquire the lock: */
- if (try_to_take_rt_mutex(lock))
+ if (try_to_take_rt_mutex(lock, current, waiter))
break;
/*
@@ -645,39 +599,11 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
break;
}
- /*
- * waiter->task is NULL the first time we come here and
- * when we have been woken up by the previous owner
- * but the lock got stolen by a higher prio task.
- */
- if (!waiter->task) {
- ret = task_blocks_on_rt_mutex(lock, waiter, current,
- detect_deadlock);
- /*
- * If we got woken up by the owner then start loop
- * all over without going into schedule to try
- * to get the lock now:
- */
- if (unlikely(!waiter->task)) {
- /*
- * Reset the return value. We might
- * have returned with -EDEADLK and the
- * owner released the lock while we
- * were walking the pi chain.
- */
- ret = 0;
- continue;
- }
- if (unlikely(ret))
- break;
- }
-
raw_spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
- if (waiter->task)
- schedule_rt_mutex(lock);
+ schedule_rt_mutex(lock);
raw_spin_lock(&lock->wait_lock);
set_current_state(state);
@@ -698,12 +624,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
int ret = 0;
debug_rt_mutex_init_waiter(&waiter);
- waiter.task = NULL;
raw_spin_lock(&lock->wait_lock);
/* Try to acquire the lock again: */
- if (try_to_take_rt_mutex(lock)) {
+ if (try_to_take_rt_mutex(lock, current, NULL)) {
raw_spin_unlock(&lock->wait_lock);
return 0;
}
@@ -717,12 +642,14 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
timeout->task = NULL;
}
- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
- detect_deadlock);
+ ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
+
+ if (likely(!ret))
+ ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
set_current_state(TASK_RUNNING);
- if (unlikely(waiter.task))
+ if (unlikely(ret))
remove_waiter(lock, &waiter);
/*
@@ -737,14 +664,6 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
if (unlikely(timeout))
hrtimer_cancel(&timeout->timer);
- /*
- * Readjust priority, when we did not get the lock. We might
- * have been the pending owner and boosted. Since we did not
- * take the lock, the PI boost has to go.
- */
- if (unlikely(ret))
- rt_mutex_adjust_prio(current);
-
debug_rt_mutex_free_waiter(&waiter);
return ret;
@@ -762,7 +681,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
if (likely(rt_mutex_owner(lock) != current)) {
- ret = try_to_take_rt_mutex(lock);
+ ret = try_to_take_rt_mutex(lock, current, NULL);
/*
* try_to_take_rt_mutex() sets the lock waiters
* bit unconditionally. Clean this up.
@@ -992,7 +911,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
{
__rt_mutex_init(lock, NULL);
debug_rt_mutex_proxy_lock(lock, proxy_owner);
- rt_mutex_set_owner(lock, proxy_owner, 0);
+ rt_mutex_set_owner(lock, proxy_owner);
rt_mutex_deadlock_account_lock(lock, proxy_owner);
}
@@ -1008,7 +927,7 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner)
{
debug_rt_mutex_proxy_unlock(lock);
- rt_mutex_set_owner(lock, NULL, 0);
+ rt_mutex_set_owner(lock, NULL);
rt_mutex_deadlock_account_unlock(proxy_owner);
}
@@ -1034,20 +953,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
raw_spin_lock(&lock->wait_lock);
- mark_rt_mutex_waiters(lock);
-
- if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
- /* We got the lock for task. */
- debug_rt_mutex_lock(lock);
- rt_mutex_set_owner(lock, task, 0);
+ if (try_to_take_rt_mutex(lock, task, NULL)) {
raw_spin_unlock(&lock->wait_lock);
- rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
- if (ret && !waiter->task) {
+ if (ret && !rt_mutex_owner(lock)) {
/*
* Reset the return value. We might have
* returned with -EDEADLK and the owner
@@ -1056,6 +969,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
*/
ret = 0;
}
+
+ if (unlikely(ret))
+ remove_waiter(lock, waiter);
+
raw_spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
@@ -1110,12 +1027,11 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
set_current_state(TASK_INTERRUPTIBLE);
- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter,
- detect_deadlock);
+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
set_current_state(TASK_RUNNING);
- if (unlikely(waiter->task))
+ if (unlikely(ret))
remove_waiter(lock, waiter);
/*
@@ -1126,13 +1042,5 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
raw_spin_unlock(&lock->wait_lock);
- /*
- * Readjust priority, when we did not get the lock. We might have been
- * the pending owner and boosted. Since we did not take the lock, the
- * PI boost has to go.
- */
- if (unlikely(ret))
- rt_mutex_adjust_prio(current);
-
return ret;
}
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index 97a2f81866af..53a66c85261b 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -91,9 +91,8 @@ task_top_pi_waiter(struct task_struct *p)
/*
* lock->owner state tracking:
*/
-#define RT_MUTEX_OWNER_PENDING 1UL
-#define RT_MUTEX_HAS_WAITERS 2UL
-#define RT_MUTEX_OWNER_MASKALL 3UL
+#define RT_MUTEX_HAS_WAITERS 1UL
+#define RT_MUTEX_OWNER_MASKALL 1UL
static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
{
@@ -101,17 +100,6 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
}
-static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock)
-{
- return (struct task_struct *)
- ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
-}
-
-static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock)
-{
- return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING;
-}
-
/*
* PI-futex support (proxy locking functions, etc.):
*/
diff --git a/kernel/sched.c b/kernel/sched.c
index 18d38e4ec7ba..c8e40b7005c0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -324,7 +324,7 @@ struct cfs_rq {
* 'curr' points to currently running entity on this cfs_rq.
* It is set to NULL otherwise (i.e when none are currently running).
*/
- struct sched_entity *curr, *next, *last;
+ struct sched_entity *curr, *next, *last, *skip;
unsigned int nr_spread_over;
@@ -606,9 +606,6 @@ static inline struct task_group *task_group(struct task_struct *p)
struct task_group *tg;
struct cgroup_subsys_state *css;
- if (p->flags & PF_EXITING)
- return &root_task_group;
-
css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
lockdep_is_held(&task_rq(p)->lock));
tg = container_of(css, struct task_group, css);
@@ -1686,6 +1683,39 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
__release(rq2->lock);
}
+#else /* CONFIG_SMP */
+
+/*
+ * double_rq_lock - safely lock two runqueues
+ *
+ * Note this does not disable interrupts like task_rq_lock,
+ * you need to do so manually before calling.
+ */
+static void double_rq_lock(struct rq *rq1, struct rq *rq2)
+ __acquires(rq1->lock)
+ __acquires(rq2->lock)
+{
+ BUG_ON(!irqs_disabled());
+ BUG_ON(rq1 != rq2);
+ raw_spin_lock(&rq1->lock);
+ __acquire(rq2->lock); /* Fake it out ;) */
+}
+
+/*
+ * double_rq_unlock - safely unlock two runqueues
+ *
+ * Note this does not restore interrupts like task_rq_unlock,
+ * you need to do so manually after calling.
+ */
+static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+ __releases(rq1->lock)
+ __releases(rq2->lock)
+{
+ BUG_ON(rq1 != rq2);
+ raw_spin_unlock(&rq1->lock);
+ __release(rq2->lock);
+}
+
#endif
static void calc_load_account_idle(struct rq *this_rq);
@@ -1880,7 +1910,7 @@ void account_system_vtime(struct task_struct *curr)
*/
if (hardirq_count())
__this_cpu_add(cpu_hardirq_time, delta);
- else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
+ else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
__this_cpu_add(cpu_softirq_time, delta);
irq_time_write_end();
@@ -1920,8 +1950,40 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
sched_rt_avg_update(rq, irq_delta);
}
+static int irqtime_account_hi_update(void)
+{
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ unsigned long flags;
+ u64 latest_ns;
+ int ret = 0;
+
+ local_irq_save(flags);
+ latest_ns = this_cpu_read(cpu_hardirq_time);
+ if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq))
+ ret = 1;
+ local_irq_restore(flags);
+ return ret;
+}
+
+static int irqtime_account_si_update(void)
+{
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ unsigned long flags;
+ u64 latest_ns;
+ int ret = 0;
+
+ local_irq_save(flags);
+ latest_ns = this_cpu_read(cpu_softirq_time);
+ if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq))
+ ret = 1;
+ local_irq_restore(flags);
+ return ret;
+}
+
#else /* CONFIG_IRQ_TIME_ACCOUNTING */
+#define sched_clock_irqtime (0)
+
static void update_rq_clock_task(struct rq *rq, s64 delta)
{
rq->clock_task += delta;
@@ -2025,14 +2087,14 @@ inline int task_curr(const struct task_struct *p)
static inline void check_class_changed(struct rq *rq, struct task_struct *p,
const struct sched_class *prev_class,
- int oldprio, int running)
+ int oldprio)
{
if (prev_class != p->sched_class) {
if (prev_class->switched_from)
- prev_class->switched_from(rq, p, running);
- p->sched_class->switched_to(rq, p, running);
- } else
- p->sched_class->prio_changed(rq, p, oldprio, running);
+ prev_class->switched_from(rq, p);
+ p->sched_class->switched_to(rq, p);
+ } else if (oldprio != p->prio)
+ p->sched_class->prio_changed(rq, p, oldprio);
}
static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
@@ -2224,7 +2286,10 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
* yield - it could be a while.
*/
if (unlikely(on_rq)) {
- schedule_timeout_uninterruptible(1);
+ ktime_t to = ktime_set(0, NSEC_PER_SEC/HZ);
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_hrtimeout(&to, HRTIMER_MODE_REL);
continue;
}
@@ -2265,27 +2330,6 @@ void kick_process(struct task_struct *p)
EXPORT_SYMBOL_GPL(kick_process);
#endif /* CONFIG_SMP */
-/**
- * task_oncpu_function_call - call a function on the cpu on which a task runs
- * @p: the task to evaluate
- * @func: the function to be called
- * @info: the function call argument
- *
- * Calls the function @func when the task is currently running. This might
- * be on the current CPU, which just calls the function directly
- */
-void task_oncpu_function_call(struct task_struct *p,
- void (*func) (void *info), void *info)
-{
- int cpu;
-
- preempt_disable();
- cpu = task_cpu(p);
- if (task_curr(p))
- smp_call_function_single(cpu, func, info, 1);
- preempt_enable();
-}
-
#ifdef CONFIG_SMP
/*
* ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
@@ -2566,6 +2610,7 @@ static void __sched_fork(struct task_struct *p)
p->se.sum_exec_runtime = 0;
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
+ p->se.vruntime = 0;
#ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -2776,9 +2821,12 @@ static inline void
prepare_task_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
+ sched_info_switch(prev, next);
+ perf_event_task_sched_out(prev, next);
fire_sched_out_preempt_notifiers(prev, next);
prepare_lock_switch(rq, next);
prepare_arch_switch(next);
+ trace_sched_switch(prev, next);
}
/**
@@ -2911,7 +2959,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);
- trace_sched_switch(prev, next);
+
mm = next->mm;
oldmm = prev->active_mm;
/*
@@ -3568,6 +3616,32 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
}
/*
+ * Account system cpu time to a process and desired cpustat field
+ * @p: the process that the cpu time gets accounted to
+ * @cputime: the cpu time spent in kernel space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
+ * @target_cputime64: pointer to cpustat field that has to be updated
+ */
+static inline
+void __account_system_time(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled, cputime64_t *target_cputime64)
+{
+ cputime64_t tmp = cputime_to_cputime64(cputime);
+
+ /* Add system time to process. */
+ p->stime = cputime_add(p->stime, cputime);
+ p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
+ account_group_system_time(p, cputime);
+
+ /* Add system time to cpustat. */
+ *target_cputime64 = cputime64_add(*target_cputime64, tmp);
+ cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
+
+ /* Account for system time used */
+ acct_update_integrals(p);
+}
+
+/*
* Account system cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @hardirq_offset: the offset to subtract from hardirq_count()
@@ -3578,36 +3652,26 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
cputime_t cputime, cputime_t cputime_scaled)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
- cputime64_t tmp;
+ cputime64_t *target_cputime64;
if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
account_guest_time(p, cputime, cputime_scaled);
return;
}
- /* Add system time to process. */
- p->stime = cputime_add(p->stime, cputime);
- p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
- account_group_system_time(p, cputime);
-
- /* Add system time to cpustat. */
- tmp = cputime_to_cputime64(cputime);
if (hardirq_count() - hardirq_offset)
- cpustat->irq = cputime64_add(cpustat->irq, tmp);
+ target_cputime64 = &cpustat->irq;
else if (in_serving_softirq())
- cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+ target_cputime64 = &cpustat->softirq;
else
- cpustat->system = cputime64_add(cpustat->system, tmp);
+ target_cputime64 = &cpustat->system;
- cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
-
- /* Account for system time used */
- acct_update_integrals(p);
+ __account_system_time(p, cputime, cputime_scaled, target_cputime64);
}
/*
* Account for involuntary wait time.
- * @steal: the cpu time spent in involuntary wait
+ * @cputime: the cpu time spent in involuntary wait
*/
void account_steal_time(cputime_t cputime)
{
@@ -3635,6 +3699,73 @@ void account_idle_time(cputime_t cputime)
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+/*
+ * Account a tick to a process and cpustat
+ * @p: the process that the cpu time gets accounted to
+ * @user_tick: is the tick from userspace
+ * @rq: the pointer to rq
+ *
+ * Tick demultiplexing follows the order
+ * - pending hardirq update
+ * - pending softirq update
+ * - user_time
+ * - idle_time
+ * - system time
+ * - check for guest_time
+ * - else account as system_time
+ *
+ * Check for hardirq is done both for system and user time as there is
+ * no timer going off while we are on hardirq and hence we may never get an
+ * opportunity to update it solely in system time.
+ * p->stime and friends are only updated on system time and not on irq
+ * softirq as those do not count in task exec_runtime any more.
+ */
+static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
+ struct rq *rq)
+{
+ cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
+ cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+
+ if (irqtime_account_hi_update()) {
+ cpustat->irq = cputime64_add(cpustat->irq, tmp);
+ } else if (irqtime_account_si_update()) {
+ cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+ } else if (this_cpu_ksoftirqd() == p) {
+ /*
+ * ksoftirqd time do not get accounted in cpu_softirq_time.
+ * So, we have to handle it separately here.
+ * Also, p->stime needs to be updated for ksoftirqd.
+ */
+ __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
+ &cpustat->softirq);
+ } else if (user_tick) {
+ account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
+ } else if (p == rq->idle) {
+ account_idle_time(cputime_one_jiffy);
+ } else if (p->flags & PF_VCPU) { /* System time or guest time */
+ account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
+ } else {
+ __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
+ &cpustat->system);
+ }
+}
+
+static void irqtime_account_idle_ticks(int ticks)
+{
+ int i;
+ struct rq *rq = this_rq();
+
+ for (i = 0; i < ticks; i++)
+ irqtime_account_process_tick(current, 0, rq);
+}
+#else /* CONFIG_IRQ_TIME_ACCOUNTING */
+static void irqtime_account_idle_ticks(int ticks) {}
+static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
+ struct rq *rq) {}
+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+
/*
* Account a single tick of cpu time.
* @p: the process that the cpu time gets accounted to
@@ -3645,6 +3776,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
+ if (sched_clock_irqtime) {
+ irqtime_account_process_tick(p, user_tick, rq);
+ return;
+ }
+
if (user_tick)
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
@@ -3670,6 +3806,12 @@ void account_steal_ticks(unsigned long ticks)
*/
void account_idle_ticks(unsigned long ticks)
{
+
+ if (sched_clock_irqtime) {
+ irqtime_account_idle_ticks(ticks);
+ return;
+ }
+
account_idle_time(jiffies_to_cputime(ticks));
}
@@ -3989,9 +4131,6 @@ need_resched_nonpreemptible:
rq->skip_clock_update = 0;
if (likely(prev != next)) {
- sched_info_switch(prev, next);
- perf_event_task_sched_out(prev, next);
-
rq->nr_switches++;
rq->curr = next;
++*switch_count;
@@ -4213,6 +4352,7 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
{
__wake_up_common(q, mode, 1, 0, key);
}
+EXPORT_SYMBOL_GPL(__wake_up_locked_key);
/**
* __wake_up_sync_key - wake up threads blocked on a waitqueue.
@@ -4570,11 +4710,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
if (running)
p->sched_class->set_curr_task(rq);
- if (on_rq) {
+ if (on_rq)
enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
- check_class_changed(rq, p, prev_class, oldprio, running);
- }
+ check_class_changed(rq, p, prev_class, oldprio);
task_rq_unlock(rq, &flags);
}
@@ -4822,12 +4961,15 @@ recheck:
param->sched_priority > rlim_rtprio)
return -EPERM;
}
+
/*
- * Like positive nice levels, dont allow tasks to
- * move out of SCHED_IDLE either:
+ * Treat SCHED_IDLE as nice 20. Only allow a switch to
+ * SCHED_NORMAL if the RLIMIT_NICE would normally permit it.
*/
- if (p->policy == SCHED_IDLE && policy != SCHED_IDLE)
- return -EPERM;
+ if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) {
+ if (!can_nice(p, TASK_NICE(p)))
+ return -EPERM;
+ }
/* can't change other user's priorities */
if (!check_same_owner(p))
@@ -4902,11 +5044,10 @@ recheck:
if (running)
p->sched_class->set_curr_task(rq);
- if (on_rq) {
+ if (on_rq)
activate_task(rq, p, 0);
- check_class_changed(rq, p, prev_class, oldprio, running);
- }
+ check_class_changed(rq, p, prev_class, oldprio);
__task_rq_unlock(rq);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -5323,6 +5464,65 @@ void __sched yield(void)
}
EXPORT_SYMBOL(yield);
+/**
+ * yield_to - yield the current processor to another thread in
+ * your thread group, or accelerate that thread toward the
+ * processor it's on.
+ *
+ * It's the caller's job to ensure that the target task struct
+ * can't go away on us before we can do any checks.
+ *
+ * Returns true if we indeed boosted the target task.
+ */
+bool __sched yield_to(struct task_struct *p, bool preempt)
+{
+ struct task_struct *curr = current;
+ struct rq *rq, *p_rq;
+ unsigned long flags;
+ bool yielded = 0;
+
+ local_irq_save(flags);
+ rq = this_rq();
+
+again:
+ p_rq = task_rq(p);
+ double_rq_lock(rq, p_rq);
+ while (task_rq(p) != p_rq) {
+ double_rq_unlock(rq, p_rq);
+ goto again;
+ }
+
+ if (!curr->sched_class->yield_to_task)
+ goto out;
+
+ if (curr->sched_class != p->sched_class)
+ goto out;
+
+ if (task_running(p_rq, p) || p->state)
+ goto out;
+
+ yielded = curr->sched_class->yield_to_task(rq, p, preempt);
+ if (yielded) {
+ schedstat_inc(rq, yld_count);
+ /*
+ * Make p's CPU reschedule; pick_next_entity takes care of
+ * fairness.
+ */
+ if (preempt && rq != p_rq)
+ resched_task(p_rq->curr);
+ }
+
+out:
+ double_rq_unlock(rq, p_rq);
+ local_irq_restore(flags);
+
+ if (yielded)
+ schedule();
+
+ return yielded;
+}
+EXPORT_SYMBOL_GPL(yield_to);
+
/*
* This task is about to go to sleep on IO. Increment rq->nr_iowait so
* that process accounting knows that this is a task in IO wait state.
@@ -5571,7 +5771,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
* The idle tasks have their own, simple scheduling class:
*/
idle->sched_class = &idle_sched_class;
- ftrace_graph_init_task(idle);
+ ftrace_graph_init_idle_task(idle, cpu);
}
/*
@@ -7796,6 +7996,10 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
INIT_LIST_HEAD(&cfs_rq->tasks);
#ifdef CONFIG_FAIR_GROUP_SCHED
cfs_rq->rq = rq;
+ /* allow initial update_cfs_load() to truncate */
+#ifdef CONFIG_SMP
+ cfs_rq->load_stamp = 1;
+#endif
#endif
cfs_rq->min_vruntime = (u64)(-(1LL << 20));
}
@@ -8109,6 +8313,8 @@ EXPORT_SYMBOL(__might_sleep);
#ifdef CONFIG_MAGIC_SYSRQ
static void normalize_task(struct rq *rq, struct task_struct *p)
{
+ const struct sched_class *prev_class = p->sched_class;
+ int old_prio = p->prio;
int on_rq;
on_rq = p->se.on_rq;
@@ -8119,6 +8325,8 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
activate_task(rq, p, 0);
resched_task(rq->curr);
}
+
+ check_class_changed(rq, p, prev_class, old_prio);
}
void normalize_rt_tasks(void)
@@ -8510,7 +8718,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
/* Propagate contribution to hierarchy */
raw_spin_lock_irqsave(&rq->lock, flags);
for_each_sched_entity(se)
- update_cfs_shares(group_cfs_rq(se), 0);
+ update_cfs_shares(group_cfs_rq(se));
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
@@ -8884,7 +9092,8 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
}
static void
-cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task)
+cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
+ struct cgroup *old_cgrp, struct task_struct *task)
{
/*
* cgroup_exit() is called in the copy_process() failure path.
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index 9fb656283157..5946ac515602 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -12,7 +12,6 @@ static atomic_t autogroup_seq_nr;
static void __init autogroup_init(struct task_struct *init_task)
{
autogroup_default.tg = &root_task_group;
- root_task_group.autogroup = &autogroup_default;
kref_init(&autogroup_default.kref);
init_rwsem(&autogroup_default.lock);
init_task->signal->autogroup = &autogroup_default;
@@ -130,7 +129,7 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
static inline bool task_group_is_autogroup(struct task_group *tg)
{
- return tg != &root_task_group && tg->autogroup;
+ return !!tg->autogroup;
}
static inline struct task_group *
@@ -161,11 +160,15 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
p->signal->autogroup = autogroup_kref_get(ag);
+ if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
+ goto out;
+
t = p;
do {
sched_move_task(t);
} while_each_thread(p, t);
+out:
unlock_task_sighand(p, &flags);
autogroup_kref_put(prev);
}
@@ -247,10 +250,14 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
{
struct autogroup *ag = autogroup_task_get(p);
+ if (!task_group_is_autogroup(ag->tg))
+ goto out;
+
down_read(&ag->lock);
seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
up_read(&ag->lock);
+out:
autogroup_kref_put(ag);
}
#endif /* CONFIG_PROC_FS */
@@ -258,9 +265,7 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
#ifdef CONFIG_SCHED_DEBUG
static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
{
- int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
-
- if (!enabled || !tg->autogroup)
+ if (!task_group_is_autogroup(tg))
return 0;
return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
index 7b859ffe5dad..05577055cfca 100644
--- a/kernel/sched_autogroup.h
+++ b/kernel/sched_autogroup.h
@@ -1,6 +1,11 @@
#ifdef CONFIG_SCHED_AUTOGROUP
struct autogroup {
+ /*
+ * reference doesn't mean how many thread attach to this
+ * autogroup now. It just stands for the number of task
+ * could use this autogroup.
+ */
struct kref kref;
struct task_group *tg;
struct rw_semaphore lock;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index eb6cb8edd075..7bacd83a4158 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -179,7 +179,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
raw_spin_lock_irqsave(&rq->lock, flags);
if (cfs_rq->rb_leftmost)
- MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime;
+ MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
last = __pick_last_entity(cfs_rq);
if (last)
max_vruntime = last->vruntime;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0c26e2df450e..3f7ec9e27ee1 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -69,14 +69,6 @@ static unsigned int sched_nr_latency = 8;
unsigned int sysctl_sched_child_runs_first __read_mostly;
/*
- * sys_sched_yield() compat mode
- *
- * This option switches the agressive yield implementation of the
- * old scheduler back on.
- */
-unsigned int __read_mostly sysctl_sched_compat_yield;
-
-/*
* SCHED_OTHER wake-up granularity.
* (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
*
@@ -419,7 +411,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
}
-static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
+static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
{
struct rb_node *left = cfs_rq->rb_leftmost;
@@ -429,6 +421,17 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
return rb_entry(left, struct sched_entity, run_node);
}
+static struct sched_entity *__pick_next_entity(struct sched_entity *se)
+{
+ struct rb_node *next = rb_next(&se->run_node);
+
+ if (!next)
+ return NULL;
+
+ return rb_entry(next, struct sched_entity, run_node);
+}
+
+#ifdef CONFIG_SCHED_DEBUG
static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
{
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
@@ -443,7 +446,6 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
* Scheduling class statistics methods:
*/
-#ifdef CONFIG_SCHED_DEBUG
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
@@ -540,7 +542,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
}
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
-static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta);
+static void update_cfs_shares(struct cfs_rq *cfs_rq);
/*
* Update the current task's runtime statistics. Skip current tasks that
@@ -733,6 +735,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
now - cfs_rq->load_last > 4 * period) {
cfs_rq->load_period = 0;
cfs_rq->load_avg = 0;
+ delta = period - 1;
}
cfs_rq->load_stamp = now;
@@ -763,16 +766,15 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
list_del_leaf_cfs_rq(cfs_rq);
}
-static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg,
- long weight_delta)
+static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
{
long load_weight, load, shares;
- load = cfs_rq->load.weight + weight_delta;
+ load = cfs_rq->load.weight;
load_weight = atomic_read(&tg->load_weight);
- load_weight -= cfs_rq->load_contribution;
load_weight += load;
+ load_weight -= cfs_rq->load_contribution;
shares = (tg->shares * load);
if (load_weight)
@@ -790,7 +792,7 @@ static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
{
if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
}
}
# else /* CONFIG_SMP */
@@ -798,8 +800,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
{
}
-static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg,
- long weight_delta)
+static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
{
return tg->shares;
}
@@ -824,7 +825,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
account_entity_enqueue(cfs_rq, se);
}
-static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
+static void update_cfs_shares(struct cfs_rq *cfs_rq)
{
struct task_group *tg;
struct sched_entity *se;
@@ -838,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
if (likely(se->load.weight == tg->shares))
return;
#endif
- shares = calc_cfs_shares(cfs_rq, tg, weight_delta);
+ shares = calc_cfs_shares(cfs_rq, tg);
reweight_entity(cfs_rq_of(se), se, shares);
}
@@ -847,7 +848,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
{
}
-static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
+static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
{
}
@@ -978,8 +979,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*/
update_curr(cfs_rq);
update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq, se->load.weight);
account_entity_enqueue(cfs_rq, se);
+ update_cfs_shares(cfs_rq);
if (flags & ENQUEUE_WAKEUP) {
place_entity(cfs_rq, se, 0);
@@ -996,19 +997,49 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
list_add_leaf_cfs_rq(cfs_rq);
}
-static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void __clear_buddies_last(struct sched_entity *se)
+{
+ for_each_sched_entity(se) {
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ if (cfs_rq->last == se)
+ cfs_rq->last = NULL;
+ else
+ break;
+ }
+}
+
+static void __clear_buddies_next(struct sched_entity *se)
{
- if (!se || cfs_rq->last == se)
- cfs_rq->last = NULL;
+ for_each_sched_entity(se) {
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ if (cfs_rq->next == se)
+ cfs_rq->next = NULL;
+ else
+ break;
+ }
+}
- if (!se || cfs_rq->next == se)
- cfs_rq->next = NULL;
+static void __clear_buddies_skip(struct sched_entity *se)
+{
+ for_each_sched_entity(se) {
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ if (cfs_rq->skip == se)
+ cfs_rq->skip = NULL;
+ else
+ break;
+ }
}
static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- for_each_sched_entity(se)
- __clear_buddies(cfs_rq_of(se), se);
+ if (cfs_rq->last == se)
+ __clear_buddies_last(se);
+
+ if (cfs_rq->next == se)
+ __clear_buddies_next(se);
+
+ if (cfs_rq->skip == se)
+ __clear_buddies_skip(se);
}
static void
@@ -1041,7 +1072,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_cfs_load(cfs_rq, 0);
account_entity_dequeue(cfs_rq, se);
update_min_vruntime(cfs_rq);
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
/*
* Normalize the entity after updating the min_vruntime because the
@@ -1084,7 +1115,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
return;
if (cfs_rq->nr_running > 1) {
- struct sched_entity *se = __pick_next_entity(cfs_rq);
+ struct sched_entity *se = __pick_first_entity(cfs_rq);
s64 delta = curr->vruntime - se->vruntime;
if (delta < 0)
@@ -1128,13 +1159,27 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
static int
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
+/*
+ * Pick the next process, keeping these things in mind, in this order:
+ * 1) keep things fair between processes/task groups
+ * 2) pick the "next" process, since someone really wants that to run
+ * 3) pick the "last" process, for cache locality
+ * 4) do not run the "skip" process, if something else is available
+ */
static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
{
- struct sched_entity *se = __pick_next_entity(cfs_rq);
+ struct sched_entity *se = __pick_first_entity(cfs_rq);
struct sched_entity *left = se;
- if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
- se = cfs_rq->next;
+ /*
+ * Avoid running the skip buddy, if running something else can
+ * be done without getting too unfair.
+ */
+ if (cfs_rq->skip == se) {
+ struct sched_entity *second = __pick_next_entity(se);
+ if (second && wakeup_preempt_entity(second, left) < 1)
+ se = second;
+ }
/*
* Prefer last buddy, try to return the CPU to a preempted task.
@@ -1142,6 +1187,12 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
se = cfs_rq->last;
+ /*
+ * Someone really wants this to run. If it's not unfair, run it.
+ */
+ if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
+ se = cfs_rq->next;
+
clear_buddies(cfs_rq, se);
return se;
@@ -1282,7 +1333,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct cfs_rq *cfs_rq = cfs_rq_of(se);
update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
}
hrtick_update(rq);
@@ -1312,58 +1363,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct cfs_rq *cfs_rq = cfs_rq_of(se);
update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
}
hrtick_update(rq);
}
-/*
- * sched_yield() support is very simple - we dequeue and enqueue.
- *
- * If compat_yield is turned on then we requeue to the end of the tree.
- */
-static void yield_task_fair(struct rq *rq)
-{
- struct task_struct *curr = rq->curr;
- struct cfs_rq *cfs_rq = task_cfs_rq(curr);
- struct sched_entity *rightmost, *se = &curr->se;
-
- /*
- * Are we the only task in the tree?
- */
- if (unlikely(cfs_rq->nr_running == 1))
- return;
-
- clear_buddies(cfs_rq, se);
-
- if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
- update_rq_clock(rq);
- /*
- * Update run-time statistics of the 'current'.
- */
- update_curr(cfs_rq);
-
- return;
- }
- /*
- * Find the rightmost entry in the rbtree:
- */
- rightmost = __pick_last_entity(cfs_rq);
- /*
- * Already in the rightmost position?
- */
- if (unlikely(!rightmost || entity_before(rightmost, se)))
- return;
-
- /*
- * Minimally necessary key value to be last in the tree:
- * Upon rescheduling, sched_class::put_prev_task() will place
- * 'current' within the tree based on its new key value.
- */
- se->vruntime = rightmost->vruntime + 1;
-}
-
#ifdef CONFIG_SMP
static void task_waking_fair(struct rq *rq, struct task_struct *p)
@@ -1834,6 +1839,14 @@ static void set_next_buddy(struct sched_entity *se)
}
}
+static void set_skip_buddy(struct sched_entity *se)
+{
+ if (likely(task_of(se)->policy != SCHED_IDLE)) {
+ for_each_sched_entity(se)
+ cfs_rq_of(se)->skip = se;
+ }
+}
+
/*
* Preempt the current task with a newly woken task if needed:
*/
@@ -1857,16 +1870,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
if (test_tsk_need_resched(curr))
return;
+ /* Idle tasks are by definition preempted by non-idle tasks. */
+ if (unlikely(curr->policy == SCHED_IDLE) &&
+ likely(p->policy != SCHED_IDLE))
+ goto preempt;
+
/*
- * Batch and idle tasks do not preempt (their preemption is driven by
- * the tick):
+ * Batch and idle tasks do not preempt non-idle tasks (their preemption
+ * is driven by the tick):
*/
if (unlikely(p->policy != SCHED_NORMAL))
return;
- /* Idle tasks are by definition preempted by everybody. */
- if (unlikely(curr->policy == SCHED_IDLE))
- goto preempt;
if (!sched_feat(WAKEUP_PREEMPT))
return;
@@ -1932,6 +1947,51 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
}
}
+/*
+ * sched_yield() is very simple
+ *
+ * The magic of dealing with the ->skip buddy is in pick_next_entity.
+ */
+static void yield_task_fair(struct rq *rq)
+{
+ struct task_struct *curr = rq->curr;
+ struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+ struct sched_entity *se = &curr->se;
+
+ /*
+ * Are we the only task in the tree?
+ */
+ if (unlikely(rq->nr_running == 1))
+ return;
+
+ clear_buddies(cfs_rq, se);
+
+ if (curr->policy != SCHED_BATCH) {
+ update_rq_clock(rq);
+ /*
+ * Update run-time statistics of the 'current'.
+ */
+ update_curr(cfs_rq);
+ }
+
+ set_skip_buddy(se);
+}
+
+static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt)
+{
+ struct sched_entity *se = &p->se;
+
+ if (!se->on_rq)
+ return false;
+
+ /* Tell the scheduler that we'd really like pse to run next. */
+ set_next_buddy(se);
+
+ yield_task_fair(rq);
+
+ return true;
+}
+
#ifdef CONFIG_SMP
/**************************************************
* Fair scheduling class load-balancing methods:
@@ -2123,7 +2183,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu)
* We need to update shares after updating tg->load_weight in
* order to adjust the weight of groups with long running tasks.
*/
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
raw_spin_unlock_irqrestore(&rq->lock, flags);
@@ -2610,7 +2670,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
* @this_cpu: Cpu for which load balance is currently performed.
* @idle: Idle status of this_cpu
* @load_idx: Load index of sched_domain of this_cpu for load calc.
- * @sd_idle: Idle status of the sched_domain containing group.
* @local_group: Does group contain this_cpu.
* @cpus: Set of cpus considered for load balancing.
* @balance: Should we balance.
@@ -2618,7 +2677,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
*/
static inline void update_sg_lb_stats(struct sched_domain *sd,
struct sched_group *group, int this_cpu,
- enum cpu_idle_type idle, int load_idx, int *sd_idle,
+ enum cpu_idle_type idle, int load_idx,
int local_group, const struct cpumask *cpus,
int *balance, struct sg_lb_stats *sgs)
{
@@ -2638,9 +2697,6 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
for_each_cpu_and(i, sched_group_cpus(group), cpus) {
struct rq *rq = cpu_rq(i);
- if (*sd_idle && rq->nr_running)
- *sd_idle = 0;
-
/* Bias balancing toward cpus of our domain */
if (local_group) {
if (idle_cpu(i) && !first_idle_cpu) {
@@ -2685,7 +2741,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
/*
* Consider the group unbalanced when the imbalance is larger
- * than the average weight of two tasks.
+ * than the average weight of a task.
*
* APZ: with cgroup the avg task weight can vary wildly and
* might not be a suitable number - should we keep a
@@ -2695,7 +2751,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
if (sgs->sum_nr_running)
avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
- if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1)
+ if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
sgs->group_imb = 1;
sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
@@ -2755,15 +2811,13 @@ static bool update_sd_pick_busiest(struct sched_domain *sd,
* @sd: sched_domain whose statistics are to be updated.
* @this_cpu: Cpu for which load balance is currently performed.
* @idle: Idle status of this_cpu
- * @sd_idle: Idle status of the sched_domain containing sg.
* @cpus: Set of cpus considered for load balancing.
* @balance: Should we balance.
* @sds: variable to hold the statistics for this sched_domain.
*/
static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
- enum cpu_idle_type idle, int *sd_idle,
- const struct cpumask *cpus, int *balance,
- struct sd_lb_stats *sds)
+ enum cpu_idle_type idle, const struct cpumask *cpus,
+ int *balance, struct sd_lb_stats *sds)
{
struct sched_domain *child = sd->child;
struct sched_group *sg = sd->groups;
@@ -2781,7 +2835,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg));
memset(&sgs, 0, sizeof(sgs));
- update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, sd_idle,
+ update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx,
local_group, cpus, balance, &sgs);
if (local_group && !(*balance))
@@ -3033,7 +3087,6 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
* @imbalance: Variable which stores amount of weighted load which should
* be moved to restore balance/put a group to idle.
* @idle: The idle status of this_cpu.
- * @sd_idle: The idleness of sd
* @cpus: The set of CPUs under consideration for load-balancing.
* @balance: Pointer to a variable indicating if this_cpu
* is the appropriate cpu to perform load balancing at this_level.
@@ -3046,7 +3099,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
static struct sched_group *
find_busiest_group(struct sched_domain *sd, int this_cpu,
unsigned long *imbalance, enum cpu_idle_type idle,
- int *sd_idle, const struct cpumask *cpus, int *balance)
+ const struct cpumask *cpus, int *balance)
{
struct sd_lb_stats sds;
@@ -3056,22 +3109,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
* Compute the various statistics relavent for load balancing at
* this level.
*/
- update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus,
- balance, &sds);
-
- /* Cases where imbalance does not exist from POV of this_cpu */
- /* 1) this_cpu is not the appropriate cpu to perform load balancing
- * at this level.
- * 2) There is no busy sibling group to pull from.
- * 3) This group is the busiest group.
- * 4) This group is more busy than the avg busieness at this
- * sched_domain.
- * 5) The imbalance is within the specified limit.
- *
- * Note: when doing newidle balance, if the local group has excess
- * capacity (i.e. nr_running < group_capacity) and the busiest group
- * does not have any capacity, we force a load balance to pull tasks
- * to the local group. In this case, we skip past checks 3, 4 and 5.
+ update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds);
+
+ /*
+ * this_cpu is not the appropriate cpu to perform load balancing at
+ * this level.
*/
if (!(*balance))
goto ret;
@@ -3080,41 +3122,55 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
check_asym_packing(sd, &sds, this_cpu, imbalance))
return sds.busiest;
+ /* There is no busy sibling group to pull tasks from */
if (!sds.busiest || sds.busiest_nr_running == 0)
goto out_balanced;
- /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
+ /*
+ * If the busiest group is imbalanced the below checks don't
+ * work because they assumes all things are equal, which typically
+ * isn't true due to cpus_allowed constraints and the like.
+ */
+ if (sds.group_imb)
+ goto force_balance;
+
+ /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity &&
!sds.busiest_has_capacity)
goto force_balance;
+ /*
+ * If the local group is more busy than the selected busiest group
+ * don't try and pull any tasks.
+ */
if (sds.this_load >= sds.max_load)
goto out_balanced;
+ /*
+ * Don't pull any tasks if this group is already above the domain
+ * average load.
+ */
sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
-
if (sds.this_load >= sds.avg_load)
goto out_balanced;
- /*
- * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative.
- * And to check for busy balance use !idle_cpu instead of
- * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE
- * even when they are idle.
- */
- if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) {
- if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
- goto out_balanced;
- } else {
+ if (idle == CPU_IDLE) {
/*
* This cpu is idle. If the busiest group load doesn't
* have more tasks than the number of available cpu's and
* there is no imbalance between this and busiest group
* wrt to idle cpu's, it is balanced.
*/
- if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
+ if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) &&
sds.busiest_nr_running <= sds.busiest_group_weight)
goto out_balanced;
+ } else {
+ /*
+ * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use
+ * imbalance_pct to be conservative.
+ */
+ if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
+ goto out_balanced;
}
force_balance:
@@ -3193,7 +3249,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
/* Working cpumask for load_balance and load_balance_newidle. */
static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
-static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle,
+static int need_active_balance(struct sched_domain *sd, int idle,
int busiest_cpu, int this_cpu)
{
if (idle == CPU_NEWLY_IDLE) {
@@ -3225,10 +3281,6 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle,
* move_tasks() will succeed. ld_moved will be true and this
* active balance code will not be triggered.
*/
- if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
- !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
- return 0;
-
if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP)
return 0;
}
@@ -3246,7 +3298,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_domain *sd, enum cpu_idle_type idle,
int *balance)
{
- int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
+ int ld_moved, all_pinned = 0, active_balance = 0;
struct sched_group *group;
unsigned long imbalance;
struct rq *busiest;
@@ -3255,20 +3307,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
cpumask_copy(cpus, cpu_active_mask);
- /*
- * When power savings policy is enabled for the parent domain, idle
- * sibling can pick up load irrespective of busy siblings. In this case,
- * let the state of idle sibling percolate up as CPU_IDLE, instead of
- * portraying it as CPU_NOT_IDLE.
- */
- if (idle != CPU_NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
- !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
- sd_idle = 1;
-
schedstat_inc(sd, lb_count[idle]);
redo:
- group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle,
+ group = find_busiest_group(sd, this_cpu, &imbalance, idle,
cpus, balance);
if (*balance == 0)
@@ -3330,8 +3372,7 @@ redo:
if (idle != CPU_NEWLY_IDLE)
sd->nr_balance_failed++;
- if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest),
- this_cpu)) {
+ if (need_active_balance(sd, idle, cpu_of(busiest), this_cpu)) {
raw_spin_lock_irqsave(&busiest->lock, flags);
/* don't kick the active_load_balance_cpu_stop,
@@ -3386,10 +3427,6 @@ redo:
sd->balance_interval *= 2;
}
- if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
- !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
- ld_moved = -1;
-
goto out;
out_balanced:
@@ -3403,11 +3440,7 @@ out_one_pinned:
(sd->balance_interval < sd->max_interval))
sd->balance_interval *= 2;
- if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
- !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
- ld_moved = -1;
- else
- ld_moved = 0;
+ ld_moved = 0;
out:
return ld_moved;
}
@@ -3831,8 +3864,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
if (load_balance(cpu, rq, sd, idle, &balance)) {
/*
* We've pulled tasks over so either we're no
- * longer idle, or one of our SMT siblings is
- * not idle.
+ * longer idle.
*/
idle = CPU_NOT_IDLE;
}
@@ -4079,33 +4111,62 @@ static void task_fork_fair(struct task_struct *p)
* Priority of the task has changed. Check to see if we preempt
* the current task.
*/
-static void prio_changed_fair(struct rq *rq, struct task_struct *p,
- int oldprio, int running)
+static void
+prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
{
+ if (!p->se.on_rq)
+ return;
+
/*
* Reschedule if we are currently running on this runqueue and
* our priority decreased, or if we are not currently running on
* this runqueue and our priority is higher than the current's
*/
- if (running) {
+ if (rq->curr == p) {
if (p->prio > oldprio)
resched_task(rq->curr);
} else
check_preempt_curr(rq, p, 0);
}
+static void switched_from_fair(struct rq *rq, struct task_struct *p)
+{
+ struct sched_entity *se = &p->se;
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ /*
+ * Ensure the task's vruntime is normalized, so that when its
+ * switched back to the fair class the enqueue_entity(.flags=0) will
+ * do the right thing.
+ *
+ * If it was on_rq, then the dequeue_entity(.flags=0) will already
+ * have normalized the vruntime, if it was !on_rq, then only when
+ * the task is sleeping will it still have non-normalized vruntime.
+ */
+ if (!se->on_rq && p->state != TASK_RUNNING) {
+ /*
+ * Fix up our vruntime so that the current sleep doesn't
+ * cause 'unlimited' sleep bonus.
+ */
+ place_entity(cfs_rq, se, 0);
+ se->vruntime -= cfs_rq->min_vruntime;
+ }
+}
+
/*
* We switched to the sched_fair class.
*/
-static void switched_to_fair(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_to_fair(struct rq *rq, struct task_struct *p)
{
+ if (!p->se.on_rq)
+ return;
+
/*
* We were most likely switched from sched_rt, so
* kick off the schedule if running, otherwise just see
* if we can still preempt the current task.
*/
- if (running)
+ if (rq->curr == p)
resched_task(rq->curr);
else
check_preempt_curr(rq, p, 0);
@@ -4171,6 +4232,7 @@ static const struct sched_class fair_sched_class = {
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
+ .yield_to_task = yield_to_task_fair,
.check_preempt_curr = check_preempt_wakeup,
@@ -4191,6 +4253,7 @@ static const struct sched_class fair_sched_class = {
.task_fork = task_fork_fair,
.prio_changed = prio_changed_fair,
+ .switched_from = switched_from_fair,
.switched_to = switched_to_fair,
.get_rr_interval = get_rr_interval_fair,
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 9fa0f402c87c..c82f26c1b7c3 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -52,31 +52,15 @@ static void set_curr_task_idle(struct rq *rq)
{
}
-static void switched_to_idle(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_to_idle(struct rq *rq, struct task_struct *p)
{
- /* Can this actually happen?? */
- if (running)
- resched_task(rq->curr);
- else
- check_preempt_curr(rq, p, 0);
+ BUG();
}
-static void prio_changed_idle(struct rq *rq, struct task_struct *p,
- int oldprio, int running)
+static void
+prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
{
- /* This can happen for hot plug CPUS */
-
- /*
- * Reschedule if we are currently running on this runqueue and
- * our priority decreased, or if we are not currently running on
- * this runqueue and our priority is higher than the current's
- */
- if (running) {
- if (p->prio > oldprio)
- resched_task(rq->curr);
- } else
- check_preempt_curr(rq, p, 0);
+ BUG();
}
static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index ad6267714c84..db308cb08b75 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -210,11 +210,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
- int this_cpu = smp_processor_id();
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
struct sched_rt_entity *rt_se;
- rt_se = rt_rq->tg->rt_se[this_cpu];
+ int cpu = cpu_of(rq_of_rt_rq(rt_rq));
+
+ rt_se = rt_rq->tg->rt_se[cpu];
if (rt_rq->rt_nr_running) {
if (rt_se && !on_rt_rq(rt_se))
@@ -226,10 +227,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
- int this_cpu = smp_processor_id();
struct sched_rt_entity *rt_se;
+ int cpu = cpu_of(rq_of_rt_rq(rt_rq));
- rt_se = rt_rq->tg->rt_se[this_cpu];
+ rt_se = rt_rq->tg->rt_se[cpu];
if (rt_se && on_rt_rq(rt_se))
dequeue_rt_entity(rt_se);
@@ -565,8 +566,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
if (rt_rq->rt_time || rt_rq->rt_nr_running)
idle = 0;
raw_spin_unlock(&rt_rq->rt_runtime_lock);
- } else if (rt_rq->rt_nr_running)
+ } else if (rt_rq->rt_nr_running) {
idle = 0;
+ if (!rt_rq_throttled(rt_rq))
+ enqueue = 1;
+ }
if (enqueue)
sched_rt_rq_enqueue(rt_rq);
@@ -1595,8 +1599,7 @@ static void rq_offline_rt(struct rq *rq)
* When switch from the rt queue, we bring ourselves to a position
* that we might want to pull RT tasks from other runqueues.
*/
-static void switched_from_rt(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_from_rt(struct rq *rq, struct task_struct *p)
{
/*
* If there are other RT tasks then we will reschedule
@@ -1605,7 +1608,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
* we may need to handle the pulling of RT tasks
* now.
*/
- if (!rq->rt.rt_nr_running)
+ if (p->se.on_rq && !rq->rt.rt_nr_running)
pull_rt_task(rq);
}
@@ -1624,8 +1627,7 @@ static inline void init_sched_rt_class(void)
* with RT tasks. In this case we try to push them off to
* other runqueues.
*/
-static void switched_to_rt(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_to_rt(struct rq *rq, struct task_struct *p)
{
int check_resched = 1;
@@ -1636,7 +1638,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
* If that current running task is also an RT task
* then see if we can move to another run queue.
*/
- if (!running) {
+ if (p->se.on_rq && rq->curr != p) {
#ifdef CONFIG_SMP
if (rq->rt.overloaded && push_rt_task(rq) &&
/* Don't resched if we changed runqueues */
@@ -1652,10 +1654,13 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
* Priority of the task has changed. This may cause
* us to initiate a push or pull.
*/
-static void prio_changed_rt(struct rq *rq, struct task_struct *p,
- int oldprio, int running)
+static void
+prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
{
- if (running) {
+ if (!p->se.on_rq)
+ return;
+
+ if (rq->curr == p) {
#ifdef CONFIG_SMP
/*
* If our priority decreases while running, we
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 2bf6b47058c1..84ec9bcf82d9 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -59,14 +59,13 @@ static void set_curr_task_stop(struct rq *rq)
{
}
-static void switched_to_stop(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_to_stop(struct rq *rq, struct task_struct *p)
{
BUG(); /* its impossible to change to this class */
}
-static void prio_changed_stop(struct rq *rq, struct task_struct *p,
- int oldprio, int running)
+static void
+prio_changed_stop(struct rq *rq, struct task_struct *p, int oldprio)
{
BUG(); /* how!?, what priority? */
}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 68eb5efec388..56e5dec837f0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(irq_stat);
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
-static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
char *softirq_to_name[NR_SOFTIRQS] = {
"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
@@ -311,9 +311,21 @@ void irq_enter(void)
}
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
-# define invoke_softirq() __do_softirq()
+static inline void invoke_softirq(void)
+{
+ if (!force_irqthreads)
+ __do_softirq();
+ else
+ wakeup_softirqd();
+}
#else
-# define invoke_softirq() do_softirq()
+static inline void invoke_softirq(void)
+{
+ if (!force_irqthreads)
+ do_softirq();
+ else
+ wakeup_softirqd();
+}
#endif
/*
@@ -721,7 +733,6 @@ static int run_ksoftirqd(void * __bind_cpu)
{
set_current_state(TASK_INTERRUPTIBLE);
- current->flags |= PF_KSOFTIRQD;
while (!kthread_should_stop()) {
preempt_disable();
if (!local_softirq_pending()) {
@@ -738,7 +749,10 @@ static int run_ksoftirqd(void * __bind_cpu)
don't process */
if (cpu_is_offline((long)__bind_cpu))
goto wait_to_die;
- do_softirq();
+ local_irq_disable();
+ if (local_softirq_pending())
+ __do_softirq();
+ local_irq_enable();
preempt_enable_no_resched();
cond_resched();
preempt_disable();
diff --git a/kernel/sys.c b/kernel/sys.c
index 18da702ec813..1ad48b3b9068 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -37,6 +37,7 @@
#include <linux/ptrace.h>
#include <linux/fs_struct.h>
#include <linux/gfp.h>
+#include <linux/syscore_ops.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -298,6 +299,7 @@ void kernel_restart_prepare(char *cmd)
system_state = SYSTEM_RESTART;
device_shutdown();
sysdev_shutdown();
+ syscore_shutdown();
}
/**
@@ -336,6 +338,7 @@ void kernel_halt(void)
{
kernel_shutdown_prepare(SYSTEM_HALT);
sysdev_shutdown();
+ syscore_shutdown();
printk(KERN_EMERG "System halted.\n");
kmsg_dump(KMSG_DUMP_HALT);
machine_halt();
@@ -355,6 +358,7 @@ void kernel_power_off(void)
pm_power_off_prepare();
disable_nonboot_cpus();
sysdev_shutdown();
+ syscore_shutdown();
printk(KERN_EMERG "Power down.\n");
kmsg_dump(KMSG_DUMP_POWEROFF);
machine_power_off();
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c782fe9924c7..25cc41cd8f33 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -186,3 +186,8 @@ cond_syscall(sys_perf_event_open);
/* fanotify! */
cond_syscall(sys_fanotify_init);
cond_syscall(sys_fanotify_mark);
+
+/* open by handle */
+cond_syscall(sys_name_to_handle_at);
+cond_syscall(sys_open_by_handle_at);
+cond_syscall(compat_sys_open_by_handle_at);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0f1bd83db985..40245d697602 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -194,9 +194,9 @@ static int sysrq_sysctl_handler(ctl_table *table, int write,
static struct ctl_table root_table[];
static struct ctl_table_root sysctl_table_root;
static struct ctl_table_header root_table_header = {
- .count = 1,
+ {{.count = 1,
.ctl_table = root_table,
- .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
+ .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
.root = &sysctl_table_root,
.set = &sysctl_table_root.default_set,
};
@@ -361,20 +361,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sched_rt_handler,
},
- {
- .procname = "sched_compat_yield",
- .data = &sysctl_sched_compat_yield,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
#ifdef CONFIG_SCHED_AUTOGROUP
{
.procname = "sched_autogroup_enabled",
.data = &sysctl_sched_autogroup_enabled,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
@@ -948,7 +941,7 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_perf_event_sample_rate,
.maxlen = sizeof(sysctl_perf_event_sample_rate),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = perf_proc_update_handler,
},
#endif
#ifdef CONFIG_KMEMCHECK
@@ -1567,11 +1560,16 @@ void sysctl_head_get(struct ctl_table_header *head)
spin_unlock(&sysctl_lock);
}
+static void free_head(struct rcu_head *rcu)
+{
+ kfree(container_of(rcu, struct ctl_table_header, rcu));
+}
+
void sysctl_head_put(struct ctl_table_header *head)
{
spin_lock(&sysctl_lock);
if (!--head->count)
- kfree(head);
+ call_rcu(&head->rcu, free_head);
spin_unlock(&sysctl_lock);
}
@@ -1685,13 +1683,8 @@ static int test_perm(int mode, int op)
int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
{
- int error;
int mode;
- error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
- if (error)
- return error;
-
if (root->permissions)
mode = root->permissions(root, current->nsproxy, table);
else
@@ -1948,10 +1941,10 @@ void unregister_sysctl_table(struct ctl_table_header * header)
start_unregistering(header);
if (!--header->parent->count) {
WARN_ON(1);
- kfree(header->parent);
+ call_rcu(&header->parent->rcu, free_head);
}
if (!--header->count)
- kfree(header);
+ call_rcu(&header->rcu, free_head);
spin_unlock(&sysctl_lock);
}
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index b875bedf7c9a..3b8e028b9601 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1321,13 +1321,11 @@ static ssize_t binary_sysctl(const int *name, int nlen,
void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
{
const struct bin_table *table = NULL;
- struct nameidata nd;
struct vfsmount *mnt;
struct file *file;
ssize_t result;
char *pathname;
int flags;
- int acc_mode;
pathname = sysctl_getname(name, nlen, &table);
result = PTR_ERR(pathname);
@@ -1337,28 +1335,17 @@ static ssize_t binary_sysctl(const int *name, int nlen,
/* How should the sysctl be accessed? */
if (oldval && oldlen && newval && newlen) {
flags = O_RDWR;
- acc_mode = MAY_READ | MAY_WRITE;
} else if (newval && newlen) {
flags = O_WRONLY;
- acc_mode = MAY_WRITE;
} else if (oldval && oldlen) {
flags = O_RDONLY;
- acc_mode = MAY_READ;
} else {
result = 0;
goto out_putname;
}
mnt = current->nsproxy->pid_ns->proc_mnt;
- result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd);
- if (result)
- goto out_putname;
-
- result = may_open(&nd.path, acc_mode, flags);
- if (result)
- goto out_putpath;
-
- file = dentry_open(nd.path.dentry, nd.path.mnt, flags, current_cred());
+ file = file_open_root(mnt->mnt_root, mnt, pathname, flags);
result = PTR_ERR(file);
if (IS_ERR(file))
goto out_putname;
@@ -1370,10 +1357,6 @@ out_putname:
putname(pathname);
out:
return result;
-
-out_putpath:
- path_put(&nd.path);
- goto out_putname;
}
diff --git a/kernel/time.c b/kernel/time.c
index 32174359576f..8e8dc6d705c9 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -150,7 +150,7 @@ static inline void warp_clock(void)
* various programs will get confused when the clock gets warped.
*/
-int do_sys_settimeofday(struct timespec *tv, struct timezone *tz)
+int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
{
static int firsttime = 1;
int error = 0;
@@ -645,7 +645,7 @@ u64 nsec_to_clock_t(u64 x)
}
/**
- * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
+ * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
*
* @n: nsecs in u64
*
@@ -657,7 +657,7 @@ u64 nsec_to_clock_t(u64 x)
* NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
* ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
*/
-unsigned long nsecs_to_jiffies(u64 n)
+u64 nsecs_to_jiffies64(u64 n)
{
#if (NSEC_PER_SEC % HZ) == 0
/* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
@@ -674,22 +674,23 @@ unsigned long nsecs_to_jiffies(u64 n)
#endif
}
-#if (BITS_PER_LONG < 64)
-u64 get_jiffies_64(void)
+/**
+ * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
+ *
+ * @n: nsecs in u64
+ *
+ * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
+ * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
+ * for scheduler, not for use in device drivers to calculate timeout value.
+ *
+ * note:
+ * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
+ * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
+ */
+unsigned long nsecs_to_jiffies(u64 n)
{
- unsigned long seq;
- u64 ret;
-
- do {
- seq = read_seqbegin(&xtime_lock);
- ret = jiffies_64;
- } while (read_seqretry(&xtime_lock, seq));
- return ret;
+ return (unsigned long)nsecs_to_jiffies64(n);
}
-EXPORT_SYMBOL(get_jiffies_64);
-#endif
-
-EXPORT_SYMBOL(jiffies);
/*
* Add two timespec values and do a safety check for overflow.
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index ee266620b06c..b0425991e9ac 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,5 @@
-obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o
+obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
+obj-y += timeconv.o posix-clock.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index d7395fdfb9f3..0d74b9ba90c8 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,7 +18,6 @@
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/sysdev.h>
-#include <linux/tick.h>
#include "tick-internal.h"
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 5404a8456909..b2fa506667c0 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -22,8 +22,11 @@
************************************************************************/
#include <linux/clocksource.h>
#include <linux/jiffies.h>
+#include <linux/module.h>
#include <linux/init.h>
+#include "tick-internal.h"
+
/* The Jiffies based clocksource is the lowest common
* denominator clock source which should function on
* all systems. It has the same coarse resolution as
@@ -64,6 +67,23 @@ struct clocksource clocksource_jiffies = {
.shift = JIFFIES_SHIFT,
};
+#if (BITS_PER_LONG < 64)
+u64 get_jiffies_64(void)
+{
+ unsigned long seq;
+ u64 ret;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ ret = jiffies_64;
+ } while (read_seqretry(&xtime_lock, seq));
+ return ret;
+}
+EXPORT_SYMBOL(get_jiffies_64);
+#endif
+
+EXPORT_SYMBOL(jiffies);
+
static int __init init_jiffies_clocksource(void)
{
return clocksource_register(&clocksource_jiffies);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 5c00242fa921..5f1bb8e2008f 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -16,6 +16,8 @@
#include <linux/mm.h>
#include <linux/module.h>
+#include "tick-internal.h"
+
/*
* NTP timekeeping variables:
*/
@@ -646,6 +648,17 @@ int do_adjtimex(struct timex *txc)
hrtimer_cancel(&leap_timer);
}
+ if (txc->modes & ADJ_SETOFFSET) {
+ struct timespec delta;
+ delta.tv_sec = txc->time.tv_sec;
+ delta.tv_nsec = txc->time.tv_usec;
+ if (!(txc->modes & ADJ_NANO))
+ delta.tv_nsec *= 1000;
+ result = timekeeping_inject_offset(&delta);
+ if (result)
+ return result;
+ }
+
getnstimeofday(&ts);
write_seqlock_irq(&xtime_lock);
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
new file mode 100644
index 000000000000..25028dd4fa18
--- /dev/null
+++ b/kernel/time/posix-clock.c
@@ -0,0 +1,451 @@
+/*
+ * posix-clock.c - support for dynamic clock devices
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/posix-clock.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+static void delete_clock(struct kref *kref);
+
+/*
+ * Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
+ */
+static struct posix_clock *get_posix_clock(struct file *fp)
+{
+ struct posix_clock *clk = fp->private_data;
+
+ mutex_lock(&clk->mutex);
+
+ if (!clk->zombie)
+ return clk;
+
+ mutex_unlock(&clk->mutex);
+
+ return NULL;
+}
+
+static void put_posix_clock(struct posix_clock *clk)
+{
+ mutex_unlock(&clk->mutex);
+}
+
+static ssize_t posix_clock_read(struct file *fp, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = -EINVAL;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.read)
+ err = clk->ops.read(clk, fp->f_flags, buf, count);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+
+static unsigned int posix_clock_poll(struct file *fp, poll_table *wait)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int result = 0;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.poll)
+ result = clk->ops.poll(clk, fp, wait);
+
+ put_posix_clock(clk);
+
+ return result;
+}
+
+static int posix_clock_fasync(int fd, struct file *fp, int on)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = 0;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.fasync)
+ err = clk->ops.fasync(clk, fd, fp, on);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+
+static int posix_clock_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = -ENODEV;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.mmap)
+ err = clk->ops.mmap(clk, vma);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+
+static long posix_clock_ioctl(struct file *fp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = -ENOTTY;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.ioctl)
+ err = clk->ops.ioctl(clk, cmd, arg);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+
+#ifdef CONFIG_COMPAT
+static long posix_clock_compat_ioctl(struct file *fp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = -ENOTTY;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.ioctl)
+ err = clk->ops.ioctl(clk, cmd, arg);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+#endif
+
+static int posix_clock_open(struct inode *inode, struct file *fp)
+{
+ int err;
+ struct posix_clock *clk =
+ container_of(inode->i_cdev, struct posix_clock, cdev);
+
+ mutex_lock(&clk->mutex);
+
+ if (clk->zombie) {
+ err = -ENODEV;
+ goto out;
+ }
+ if (clk->ops.open)
+ err = clk->ops.open(clk, fp->f_mode);
+ else
+ err = 0;
+
+ if (!err) {
+ kref_get(&clk->kref);
+ fp->private_data = clk;
+ }
+out:
+ mutex_unlock(&clk->mutex);
+ return err;
+}
+
+static int posix_clock_release(struct inode *inode, struct file *fp)
+{
+ struct posix_clock *clk = fp->private_data;
+ int err = 0;
+
+ if (clk->ops.release)
+ err = clk->ops.release(clk);
+
+ kref_put(&clk->kref, delete_clock);
+
+ fp->private_data = NULL;
+
+ return err;
+}
+
+static const struct file_operations posix_clock_file_operations = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .read = posix_clock_read,
+ .poll = posix_clock_poll,
+ .unlocked_ioctl = posix_clock_ioctl,
+ .open = posix_clock_open,
+ .release = posix_clock_release,
+ .fasync = posix_clock_fasync,
+ .mmap = posix_clock_mmap,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = posix_clock_compat_ioctl,
+#endif
+};
+
+int posix_clock_register(struct posix_clock *clk, dev_t devid)
+{
+ int err;
+
+ kref_init(&clk->kref);
+ mutex_init(&clk->mutex);
+
+ cdev_init(&clk->cdev, &posix_clock_file_operations);
+ clk->cdev.owner = clk->ops.owner;
+ err = cdev_add(&clk->cdev, devid, 1);
+ if (err)
+ goto no_cdev;
+
+ return err;
+no_cdev:
+ mutex_destroy(&clk->mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(posix_clock_register);
+
+static void delete_clock(struct kref *kref)
+{
+ struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
+ mutex_destroy(&clk->mutex);
+ if (clk->release)
+ clk->release(clk);
+}
+
+void posix_clock_unregister(struct posix_clock *clk)
+{
+ cdev_del(&clk->cdev);
+
+ mutex_lock(&clk->mutex);
+ clk->zombie = true;
+ mutex_unlock(&clk->mutex);
+
+ kref_put(&clk->kref, delete_clock);
+}
+EXPORT_SYMBOL_GPL(posix_clock_unregister);
+
+struct posix_clock_desc {
+ struct file *fp;
+ struct posix_clock *clk;
+};
+
+static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd)
+{
+ struct file *fp = fget(CLOCKID_TO_FD(id));
+ int err = -EINVAL;
+
+ if (!fp)
+ return err;
+
+ if (fp->f_op->open != posix_clock_open || !fp->private_data)
+ goto out;
+
+ cd->fp = fp;
+ cd->clk = get_posix_clock(fp);
+
+ err = cd->clk ? 0 : -ENODEV;
+out:
+ if (err)
+ fput(fp);
+ return err;
+}
+
+static void put_clock_desc(struct posix_clock_desc *cd)
+{
+ put_posix_clock(cd->clk);
+ fput(cd->fp);
+}
+
+static int pc_clock_adjtime(clockid_t id, struct timex *tx)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
+ err = -EACCES;
+ goto out;
+ }
+
+ if (cd.clk->ops.clock_adjtime)
+ err = cd.clk->ops.clock_adjtime(cd.clk, tx);
+ else
+ err = -EOPNOTSUPP;
+out:
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.clock_gettime)
+ err = cd.clk->ops.clock_gettime(cd.clk, ts);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_clock_getres(clockid_t id, struct timespec *ts)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.clock_getres)
+ err = cd.clk->ops.clock_getres(cd.clk, ts);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_clock_settime(clockid_t id, const struct timespec *ts)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if ((cd.fp->f_mode & FMODE_WRITE) == 0) {
+ err = -EACCES;
+ goto out;
+ }
+
+ if (cd.clk->ops.clock_settime)
+ err = cd.clk->ops.clock_settime(cd.clk, ts);
+ else
+ err = -EOPNOTSUPP;
+out:
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_timer_create(struct k_itimer *kit)
+{
+ clockid_t id = kit->it_clock;
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.timer_create)
+ err = cd.clk->ops.timer_create(cd.clk, kit);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_timer_delete(struct k_itimer *kit)
+{
+ clockid_t id = kit->it_clock;
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.timer_delete)
+ err = cd.clk->ops.timer_delete(cd.clk, kit);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts)
+{
+ clockid_t id = kit->it_clock;
+ struct posix_clock_desc cd;
+
+ if (get_clock_desc(id, &cd))
+ return;
+
+ if (cd.clk->ops.timer_gettime)
+ cd.clk->ops.timer_gettime(cd.clk, kit, ts);
+
+ put_clock_desc(&cd);
+}
+
+static int pc_timer_settime(struct k_itimer *kit, int flags,
+ struct itimerspec *ts, struct itimerspec *old)
+{
+ clockid_t id = kit->it_clock;
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.timer_settime)
+ err = cd.clk->ops.timer_settime(cd.clk, kit, flags, ts, old);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+struct k_clock clock_posix_dynamic = {
+ .clock_getres = pc_clock_getres,
+ .clock_set = pc_clock_settime,
+ .clock_get = pc_clock_gettime,
+ .clock_adj = pc_clock_adjtime,
+ .timer_create = pc_timer_create,
+ .timer_set = pc_timer_settime,
+ .timer_del = pc_timer_delete,
+ .timer_get = pc_timer_gettime,
+};
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 48b2761b5668..da800ffa810c 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -18,7 +18,6 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
-#include <linux/tick.h>
#include "tick-internal.h"
@@ -600,4 +599,14 @@ int tick_broadcast_oneshot_active(void)
return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
}
+/*
+ * Check whether the broadcast device supports oneshot.
+ */
+bool tick_broadcast_oneshot_available(void)
+{
+ struct clock_event_device *bc = tick_broadcast_device.evtdev;
+
+ return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
+}
+
#endif
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 051bc80a0c43..119528de8235 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -18,7 +18,6 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
-#include <linux/tick.h>
#include <asm/irq_regs.h>
@@ -51,7 +50,11 @@ int tick_is_oneshot_available(void)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
- return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT);
+ if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT))
+ return 0;
+ if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
+ return 1;
+ return tick_broadcast_oneshot_available();
}
/*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 290eefbc1f60..1009b06d6f89 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -1,6 +1,10 @@
/*
* tick internal variable and functions used by low/high res code
*/
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
#define TICK_DO_TIMER_NONE -1
#define TICK_DO_TIMER_BOOT -2
@@ -36,6 +40,7 @@ extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
extern int tick_broadcast_oneshot_active(void);
extern void tick_check_oneshot_broadcast(int cpu);
+bool tick_broadcast_oneshot_available(void);
# else /* BROADCAST */
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
@@ -46,6 +51,7 @@ static inline void tick_broadcast_switch_to_oneshot(void) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
static inline int tick_broadcast_oneshot_active(void) { return 0; }
static inline void tick_check_oneshot_broadcast(int cpu) { }
+static inline bool tick_broadcast_oneshot_available(void) { return true; }
# endif /* !BROADCAST */
#else /* !ONESHOT */
@@ -76,6 +82,7 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
return 0;
}
static inline int tick_broadcast_oneshot_active(void) { return 0; }
+static inline bool tick_broadcast_oneshot_available(void) { return false; }
#endif /* !TICK_ONESHOT */
/*
@@ -132,3 +139,8 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
{
return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
}
+
+#endif
+
+extern void do_timer(unsigned long ticks);
+extern seqlock_t xtime_lock;
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 5cbc101f908b..2d04411a5f05 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -18,7 +18,6 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
-#include <linux/tick.h>
#include "tick-internal.h"
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c55ea2433471..d5097c44b407 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -19,7 +19,6 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
-#include <linux/tick.h>
#include <linux/module.h>
#include <asm/irq_regs.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d27c7562902c..3bd7e3d5c632 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -353,7 +353,7 @@ EXPORT_SYMBOL(do_gettimeofday);
*
* Sets the time of day to the new time and update NTP and notify hrtimers
*/
-int do_settimeofday(struct timespec *tv)
+int do_settimeofday(const struct timespec *tv)
{
struct timespec ts_delta;
unsigned long flags;
@@ -387,6 +387,42 @@ int do_settimeofday(struct timespec *tv)
EXPORT_SYMBOL(do_settimeofday);
+
+/**
+ * timekeeping_inject_offset - Adds or subtracts from the current time.
+ * @tv: pointer to the timespec variable containing the offset
+ *
+ * Adds or subtracts an offset value from the current time.
+ */
+int timekeeping_inject_offset(struct timespec *ts)
+{
+ unsigned long flags;
+
+ if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+ return -EINVAL;
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+
+ timekeeping_forward_now();
+
+ xtime = timespec_add(xtime, *ts);
+ wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
+
+ timekeeper.ntp_error = 0;
+ ntp_clear();
+
+ update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+ timekeeper.mult);
+
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ /* signal hrtimers about time change */
+ clock_was_set();
+
+ return 0;
+}
+EXPORT_SYMBOL(timekeeping_inject_offset);
+
/**
* change_clocksource - Swaps clocksources if a new one is available
*
@@ -779,7 +815,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
*
* Called from the timer interrupt, must hold a write on xtime_lock.
*/
-void update_wall_time(void)
+static void update_wall_time(void)
{
struct clocksource *clock;
cycle_t offset;
@@ -871,7 +907,7 @@ void update_wall_time(void)
* getboottime - Return the real time of system boot.
* @ts: pointer to the timespec to be set
*
- * Returns the time of day in a timespec.
+ * Returns the wall-time of boot in a timespec.
*
* This is based on the wall_to_monotonic offset and the total suspend
* time. Calls to settimeofday will affect the value returned (which
@@ -889,6 +925,55 @@ void getboottime(struct timespec *ts)
}
EXPORT_SYMBOL_GPL(getboottime);
+
+/**
+ * get_monotonic_boottime - Returns monotonic time since boot
+ * @ts: pointer to the timespec to be set
+ *
+ * Returns the monotonic time since boot in a timespec.
+ *
+ * This is similar to CLOCK_MONTONIC/ktime_get_ts, but also
+ * includes the time spent in suspend.
+ */
+void get_monotonic_boottime(struct timespec *ts)
+{
+ struct timespec tomono, sleep;
+ unsigned int seq;
+ s64 nsecs;
+
+ WARN_ON(timekeeping_suspended);
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ *ts = xtime;
+ tomono = wall_to_monotonic;
+ sleep = total_sleep_time;
+ nsecs = timekeeping_get_ns();
+
+ } while (read_seqretry(&xtime_lock, seq));
+
+ set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec,
+ ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs);
+}
+EXPORT_SYMBOL_GPL(get_monotonic_boottime);
+
+/**
+ * ktime_get_boottime - Returns monotonic time since boot in a ktime
+ *
+ * Returns the monotonic time since boot in a ktime
+ *
+ * This is similar to CLOCK_MONTONIC/ktime_get, but also
+ * includes the time spent in suspend.
+ */
+ktime_t ktime_get_boottime(void)
+{
+ struct timespec ts;
+
+ get_monotonic_boottime(&ts);
+ return timespec_to_ktime(ts);
+}
+EXPORT_SYMBOL_GPL(ktime_get_boottime);
+
/**
* monotonic_to_bootbased - Convert the monotonic time to boot based.
* @ts: pointer to the timespec to be converted
@@ -910,11 +995,6 @@ struct timespec __current_kernel_time(void)
return xtime;
}
-struct timespec __get_wall_to_monotonic(void)
-{
- return wall_to_monotonic;
-}
-
struct timespec current_kernel_time(void)
{
struct timespec now;
@@ -946,3 +1026,48 @@ struct timespec get_monotonic_coarse(void)
now.tv_nsec + mono.tv_nsec);
return now;
}
+
+/*
+ * The 64-bit jiffies value is not atomic - you MUST NOT read it
+ * without sampling the sequence number in xtime_lock.
+ * jiffies is defined in the linker script...
+ */
+void do_timer(unsigned long ticks)
+{
+ jiffies_64 += ticks;
+ update_wall_time();
+ calc_global_load(ticks);
+}
+
+/**
+ * get_xtime_and_monotonic_and_sleep_offset() - get xtime, wall_to_monotonic,
+ * and sleep offsets.
+ * @xtim: pointer to timespec to be set with xtime
+ * @wtom: pointer to timespec to be set with wall_to_monotonic
+ * @sleep: pointer to timespec to be set with time in suspend
+ */
+void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
+ struct timespec *wtom, struct timespec *sleep)
+{
+ unsigned long seq;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ *xtim = xtime;
+ *wtom = wall_to_monotonic;
+ *sleep = total_sleep_time;
+ } while (read_seqretry(&xtime_lock, seq));
+}
+
+/**
+ * xtime_update() - advances the timekeeping infrastructure
+ * @ticks: number of ticks, that have elapsed since the last call.
+ *
+ * Must be called with interrupts disabled.
+ */
+void xtime_update(unsigned long ticks)
+{
+ write_seqlock(&xtime_lock);
+ do_timer(ticks);
+ write_sequnlock(&xtime_lock);
+}
diff --git a/kernel/timer.c b/kernel/timer.c
index d6459923d245..fd6198692b57 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -404,6 +404,11 @@ static void timer_stats_account_timer(struct timer_list *timer) {}
static struct debug_obj_descr timer_debug_descr;
+static void *timer_debug_hint(void *addr)
+{
+ return ((struct timer_list *) addr)->function;
+}
+
/*
* fixup_init is called when:
* - an active object is initialized
@@ -477,6 +482,7 @@ static int timer_fixup_free(void *addr, enum debug_obj_state state)
static struct debug_obj_descr timer_debug_descr = {
.name = "timer_list",
+ .debug_hint = timer_debug_hint,
.fixup_init = timer_fixup_init,
.fixup_activate = timer_fixup_activate,
.fixup_free = timer_fixup_free,
@@ -964,6 +970,25 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
* add_timer_on(). Upon exit the timer is not queued and the handler is
* not running on any CPU.
*
+ * Note: You must not hold locks that are held in interrupt context
+ * while calling this function. Even if the lock has nothing to do
+ * with the timer in question. Here's why:
+ *
+ * CPU0 CPU1
+ * ---- ----
+ * <SOFTIRQ>
+ * call_timer_fn();
+ * base->running_timer = mytimer;
+ * spin_lock_irq(somelock);
+ * <IRQ>
+ * spin_lock(somelock);
+ * del_timer_sync(mytimer);
+ * while (base->running_timer == mytimer);
+ *
+ * Now del_timer_sync() will never return and never release somelock.
+ * The interrupt on the other CPU is waiting to grab somelock but
+ * it has interrupted the softirq that CPU0 is waiting to finish.
+ *
* The function returns whether it has deactivated a pending timer or not.
*/
int del_timer_sync(struct timer_list *timer)
@@ -971,6 +996,10 @@ int del_timer_sync(struct timer_list *timer)
#ifdef CONFIG_LOCKDEP
unsigned long flags;
+ /*
+ * If lockdep gives a backtrace here, please reference
+ * the synchronization rules above.
+ */
local_irq_save(flags);
lock_map_acquire(&timer->lockdep_map);
lock_map_release(&timer->lockdep_map);
@@ -1295,19 +1324,6 @@ void run_local_timers(void)
raise_softirq(TIMER_SOFTIRQ);
}
-/*
- * The 64-bit jiffies value is not atomic - you MUST NOT read it
- * without sampling the sequence number in xtime_lock.
- * jiffies is defined in the linker script...
- */
-
-void do_timer(unsigned long ticks)
-{
- jiffies_64 += ticks;
- update_wall_time();
- calc_global_load(ticks);
-}
-
#ifdef __ARCH_WANT_SYS_ALARM
/*
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d95721f33702..cbafed7d4f38 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1827,21 +1827,5 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
rwbs[i] = '\0';
}
-void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
-{
- int rw = rq->cmd_flags & 0x03;
- int bytes;
-
- if (rq->cmd_flags & REQ_DISCARD)
- rw |= REQ_DISCARD;
-
- if (rq->cmd_flags & REQ_SECURE)
- rw |= REQ_SECURE;
-
- bytes = blk_rq_bytes(rq);
-
- blk_fill_rwbs(rwbs, rw, bytes);
-}
-
#endif /* CONFIG_EVENT_TRACING */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f3dadae83883..888b611897d3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3328,7 +3328,7 @@ static int start_graph_tracing(void)
/* The cpu_boot init_task->ret_stack will never be freed */
for_each_online_cpu(cpu) {
if (!idle_task(cpu)->ret_stack)
- ftrace_graph_init_task(idle_task(cpu));
+ ftrace_graph_init_idle_task(idle_task(cpu), cpu);
}
do {
@@ -3418,6 +3418,49 @@ void unregister_ftrace_graph(void)
mutex_unlock(&ftrace_lock);
}
+static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
+
+static void
+graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
+{
+ atomic_set(&t->tracing_graph_pause, 0);
+ atomic_set(&t->trace_overrun, 0);
+ t->ftrace_timestamp = 0;
+ /* make curr_ret_stack visable before we add the ret_stack */
+ smp_wmb();
+ t->ret_stack = ret_stack;
+}
+
+/*
+ * Allocate a return stack for the idle task. May be the first
+ * time through, or it may be done by CPU hotplug online.
+ */
+void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
+{
+ t->curr_ret_stack = -1;
+ /*
+ * The idle task has no parent, it either has its own
+ * stack or no stack at all.
+ */
+ if (t->ret_stack)
+ WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
+
+ if (ftrace_graph_active) {
+ struct ftrace_ret_stack *ret_stack;
+
+ ret_stack = per_cpu(idle_ret_stack, cpu);
+ if (!ret_stack) {
+ ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
+ * sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!ret_stack)
+ return;
+ per_cpu(idle_ret_stack, cpu) = ret_stack;
+ }
+ graph_init_task(t, ret_stack);
+ }
+}
+
/* Allocate a return stack for newly created task */
void ftrace_graph_init_task(struct task_struct *t)
{
@@ -3433,12 +3476,7 @@ void ftrace_graph_init_task(struct task_struct *t)
GFP_KERNEL);
if (!ret_stack)
return;
- atomic_set(&t->tracing_graph_pause, 0);
- atomic_set(&t->trace_overrun, 0);
- t->ftrace_timestamp = 0;
- /* make curr_ret_stack visable before we add the ret_stack */
- smp_wmb();
- t->ret_stack = ret_stack;
+ graph_init_task(t, ret_stack);
}
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bd1c35a4fbcc..db7b439d23ee 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5,7 +5,6 @@
*/
#include <linux/ring_buffer.h>
#include <linux/trace_clock.h>
-#include <linux/ftrace_irq.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
@@ -1429,6 +1428,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
}
EXPORT_SYMBOL_GPL(ring_buffer_resize);
+void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
+{
+ mutex_lock(&buffer->mutex);
+ if (val)
+ buffer->flags |= RB_FL_OVERWRITE;
+ else
+ buffer->flags &= ~RB_FL_OVERWRITE;
+ mutex_unlock(&buffer->mutex);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
+
static inline void *
__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
{
@@ -2162,11 +2172,19 @@ rb_reserve_next_event(struct ring_buffer *buffer,
if (likely(ts >= cpu_buffer->write_stamp)) {
delta = diff;
if (unlikely(test_time_stamp(delta))) {
+ int local_clock_stable = 1;
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+ local_clock_stable = sched_clock_stable;
+#endif
WARN_ONCE(delta > (1ULL << 59),
- KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
+ KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
(unsigned long long)delta,
(unsigned long long)ts,
- (unsigned long long)cpu_buffer->write_stamp);
+ (unsigned long long)cpu_buffer->write_stamp,
+ local_clock_stable ? "" :
+ "If you just came from a suspend/resume,\n"
+ "please switch to the trace global clock:\n"
+ " echo global > /sys/kernel/debug/tracing/trace_clock\n");
add_timestamp = 1;
}
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dc53ecb80589..9541c27c1cf2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -41,8 +41,6 @@
#include "trace.h"
#include "trace_output.h"
-#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
-
/*
* On boot up, the ring buffer is set to the minimum size, so that
* we do not waste memory on systems that are not using tracing.
@@ -340,7 +338,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
/* trace_flags holds trace_options default values */
unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
- TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;
+ TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE;
static int trace_stop_count;
static DEFINE_SPINLOCK(tracing_start_lock);
@@ -425,6 +423,7 @@ static const char *trace_options[] = {
"sleep-time",
"graph-time",
"record-cmd",
+ "overwrite",
NULL
};
@@ -780,6 +779,11 @@ __acquires(kernel_lock)
tracing_reset_online_cpus(tr);
current_trace = type;
+
+ /* If we expanded the buffers, make sure the max is expanded too */
+ if (ring_buffer_expanded && type->use_max_tr)
+ ring_buffer_resize(max_tr.buffer, trace_buf_size);
+
/* the test is responsible for initializing and enabling */
pr_info("Testing tracer %s: ", type->name);
ret = type->selftest(type, tr);
@@ -792,6 +796,10 @@ __acquires(kernel_lock)
/* Only reset on passing, to avoid touching corrupted buffers */
tracing_reset_online_cpus(tr);
+ /* Shrink the max buffer again */
+ if (ring_buffer_expanded && type->use_max_tr)
+ ring_buffer_resize(max_tr.buffer, 1);
+
printk(KERN_CONT "PASSED\n");
}
#endif
@@ -1102,7 +1110,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
- entry->lock_depth = (tsk) ? tsk->lock_depth : 0;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1749,10 +1756,9 @@ static void print_lat_help_header(struct seq_file *m)
seq_puts(m, "# | / _----=> need-resched \n");
seq_puts(m, "# || / _---=> hardirq/softirq \n");
seq_puts(m, "# ||| / _--=> preempt-depth \n");
- seq_puts(m, "# |||| /_--=> lock-depth \n");
- seq_puts(m, "# |||||/ delay \n");
- seq_puts(m, "# cmd pid |||||| time | caller \n");
- seq_puts(m, "# \\ / |||||| \\ | / \n");
+ seq_puts(m, "# |||| / delay \n");
+ seq_puts(m, "# cmd pid ||||| time | caller \n");
+ seq_puts(m, "# \\ / ||||| \\ | / \n");
}
static void print_func_help_header(struct seq_file *m)
@@ -2529,6 +2535,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
if (mask == TRACE_ITER_RECORD_CMD)
trace_event_enable_cmd_record(enabled);
+
+ if (mask == TRACE_ITER_OVERWRITE)
+ ring_buffer_change_overwrite(global_trace.buffer, enabled);
}
static ssize_t
@@ -2710,6 +2719,10 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
mutex_lock(&trace_types_lock);
if (tracer_enabled ^ val) {
+
+ /* Only need to warn if this is used to change the state */
+ WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on");
+
if (val) {
tracer_enabled = 1;
if (current_trace->start)
@@ -4551,9 +4564,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
__init static int tracer_alloc_buffers(void)
{
int ring_buf_size;
+ enum ring_buffer_flags rb_flags;
int i;
int ret = -ENOMEM;
+
if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
goto out;
@@ -4566,12 +4581,13 @@ __init static int tracer_alloc_buffers(void)
else
ring_buf_size = 1;
+ rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
+
cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
cpumask_copy(tracing_cpumask, cpu_all_mask);
/* TODO: make the number of buffers hot pluggable with CPUS */
- global_trace.buffer = ring_buffer_alloc(ring_buf_size,
- TRACE_BUFFER_FLAGS);
+ global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags);
if (!global_trace.buffer) {
printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
WARN_ON(1);
@@ -4581,7 +4597,7 @@ __init static int tracer_alloc_buffers(void)
#ifdef CONFIG_TRACER_MAX_TRACE
- max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS);
+ max_tr.buffer = ring_buffer_alloc(1, rb_flags);
if (!max_tr.buffer) {
printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
WARN_ON(1);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9021f8c0c0c3..5e9dfc6286dd 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -272,8 +272,8 @@ struct tracer {
/* If you handled the flag setting, return 0 */
int (*set_flag)(u32 old_flags, u32 bit, int set);
struct tracer *next;
- int print_max;
struct tracer_flags *flags;
+ int print_max;
int use_max_tr;
};
@@ -606,6 +606,7 @@ enum trace_iterator_flags {
TRACE_ITER_SLEEP_TIME = 0x40000,
TRACE_ITER_GRAPH_TIME = 0x80000,
TRACE_ITER_RECORD_CMD = 0x100000,
+ TRACE_ITER_OVERWRITE = 0x200000,
};
/*
@@ -661,8 +662,10 @@ struct ftrace_event_field {
};
struct event_filter {
- int n_preds;
- struct filter_pred **preds;
+ int n_preds; /* Number assigned */
+ int a_preds; /* allocated */
+ struct filter_pred *preds;
+ struct filter_pred *root;
char *filter_string;
};
@@ -674,11 +677,23 @@ struct event_subsystem {
int nr_events;
};
+#define FILTER_PRED_INVALID ((unsigned short)-1)
+#define FILTER_PRED_IS_RIGHT (1 << 15)
+#define FILTER_PRED_FOLD (1 << 15)
+
+/*
+ * The max preds is the size of unsigned short with
+ * two flags at the MSBs. One bit is used for both the IS_RIGHT
+ * and FOLD flags. The other is reserved.
+ *
+ * 2^14 preds is way more than enough.
+ */
+#define MAX_FILTER_PRED 16384
+
struct filter_pred;
struct regex;
-typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
- int val1, int val2);
+typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
typedef int (*regex_match_func)(char *str, struct regex *r, int len);
@@ -700,11 +715,23 @@ struct filter_pred {
filter_pred_fn_t fn;
u64 val;
struct regex regex;
- char *field_name;
+ /*
+ * Leaf nodes use field_name, ops is used by AND and OR
+ * nodes. The field_name is always freed when freeing a pred.
+ * We can overload field_name for ops and have it freed
+ * as well.
+ */
+ union {
+ char *field_name;
+ unsigned short *ops;
+ };
int offset;
int not;
int op;
- int pop_n;
+ unsigned short index;
+ unsigned short parent;
+ unsigned short left;
+ unsigned short right;
};
extern struct list_head ftrace_common_fields;
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 6cf223764be8..1516cb3ec549 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -109,12 +109,12 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
*/
#define FTRACE_CTX_FIELDS \
__field( unsigned int, prev_pid ) \
+ __field( unsigned int, next_pid ) \
+ __field( unsigned int, next_cpu ) \
__field( unsigned char, prev_prio ) \
__field( unsigned char, prev_state ) \
- __field( unsigned int, next_pid ) \
__field( unsigned char, next_prio ) \
- __field( unsigned char, next_state ) \
- __field( unsigned int, next_cpu )
+ __field( unsigned char, next_state )
FTRACE_ENTRY(context_switch, ctx_switch_entry,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5f499e0438a4..e88f74fe1d4c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
__common_field(unsigned char, flags);
__common_field(unsigned char, preempt_count);
__common_field(int, pid);
- __common_field(int, lock_depth);
return ret;
}
@@ -326,6 +325,7 @@ int trace_set_clr_event(const char *system, const char *event, int set)
{
return __ftrace_set_clr_event(NULL, system, event, set);
}
+EXPORT_SYMBOL_GPL(trace_set_clr_event);
/* 128 should be much more than enough */
#define EVENT_BUF_SIZE 127
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 36d40104b17f..3249b4f77ef0 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -123,9 +123,13 @@ struct filter_parse_state {
} operand;
};
+struct pred_stack {
+ struct filter_pred **preds;
+ int index;
+};
+
#define DEFINE_COMPARISON_PRED(type) \
-static int filter_pred_##type(struct filter_pred *pred, void *event, \
- int val1, int val2) \
+static int filter_pred_##type(struct filter_pred *pred, void *event) \
{ \
type *addr = (type *)(event + pred->offset); \
type val = (type)pred->val; \
@@ -152,8 +156,7 @@ static int filter_pred_##type(struct filter_pred *pred, void *event, \
}
#define DEFINE_EQUALITY_PRED(size) \
-static int filter_pred_##size(struct filter_pred *pred, void *event, \
- int val1, int val2) \
+static int filter_pred_##size(struct filter_pred *pred, void *event) \
{ \
u##size *addr = (u##size *)(event + pred->offset); \
u##size val = (u##size)pred->val; \
@@ -178,23 +181,8 @@ DEFINE_EQUALITY_PRED(32);
DEFINE_EQUALITY_PRED(16);
DEFINE_EQUALITY_PRED(8);
-static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
- void *event __attribute((unused)),
- int val1, int val2)
-{
- return val1 && val2;
-}
-
-static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
- void *event __attribute((unused)),
- int val1, int val2)
-{
- return val1 || val2;
-}
-
/* Filter predicate for fixed sized arrays of characters */
-static int filter_pred_string(struct filter_pred *pred, void *event,
- int val1, int val2)
+static int filter_pred_string(struct filter_pred *pred, void *event)
{
char *addr = (char *)(event + pred->offset);
int cmp, match;
@@ -207,8 +195,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
}
/* Filter predicate for char * pointers */
-static int filter_pred_pchar(struct filter_pred *pred, void *event,
- int val1, int val2)
+static int filter_pred_pchar(struct filter_pred *pred, void *event)
{
char **addr = (char **)(event + pred->offset);
int cmp, match;
@@ -231,8 +218,7 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
* and add it to the address of the entry, and at last we have
* the address of the string.
*/
-static int filter_pred_strloc(struct filter_pred *pred, void *event,
- int val1, int val2)
+static int filter_pred_strloc(struct filter_pred *pred, void *event)
{
u32 str_item = *(u32 *)(event + pred->offset);
int str_loc = str_item & 0xffff;
@@ -247,8 +233,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
return match;
}
-static int filter_pred_none(struct filter_pred *pred, void *event,
- int val1, int val2)
+static int filter_pred_none(struct filter_pred *pred, void *event)
{
return 0;
}
@@ -377,32 +362,147 @@ static void filter_build_regex(struct filter_pred *pred)
pred->not ^= not;
}
+enum move_type {
+ MOVE_DOWN,
+ MOVE_UP_FROM_LEFT,
+ MOVE_UP_FROM_RIGHT
+};
+
+static struct filter_pred *
+get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
+ int index, enum move_type *move)
+{
+ if (pred->parent & FILTER_PRED_IS_RIGHT)
+ *move = MOVE_UP_FROM_RIGHT;
+ else
+ *move = MOVE_UP_FROM_LEFT;
+ pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
+
+ return pred;
+}
+
+/*
+ * A series of AND or ORs where found together. Instead of
+ * climbing up and down the tree branches, an array of the
+ * ops were made in order of checks. We can just move across
+ * the array and short circuit if needed.
+ */
+static int process_ops(struct filter_pred *preds,
+ struct filter_pred *op, void *rec)
+{
+ struct filter_pred *pred;
+ int type;
+ int match;
+ int i;
+
+ /*
+ * Micro-optimization: We set type to true if op
+ * is an OR and false otherwise (AND). Then we
+ * just need to test if the match is equal to
+ * the type, and if it is, we can short circuit the
+ * rest of the checks:
+ *
+ * if ((match && op->op == OP_OR) ||
+ * (!match && op->op == OP_AND))
+ * return match;
+ */
+ type = op->op == OP_OR;
+
+ for (i = 0; i < op->val; i++) {
+ pred = &preds[op->ops[i]];
+ match = pred->fn(pred, rec);
+ if (!!match == type)
+ return match;
+ }
+ return match;
+}
+
/* return 1 if event matches, 0 otherwise (discard) */
int filter_match_preds(struct event_filter *filter, void *rec)
{
- int match, top = 0, val1 = 0, val2 = 0;
- int stack[MAX_FILTER_PRED];
+ int match = -1;
+ enum move_type move = MOVE_DOWN;
+ struct filter_pred *preds;
struct filter_pred *pred;
- int i;
+ struct filter_pred *root;
+ int n_preds;
+ int done = 0;
+
+ /* no filter is considered a match */
+ if (!filter)
+ return 1;
+
+ n_preds = filter->n_preds;
+
+ if (!n_preds)
+ return 1;
+
+ /*
+ * n_preds, root and filter->preds are protect with preemption disabled.
+ */
+ preds = rcu_dereference_sched(filter->preds);
+ root = rcu_dereference_sched(filter->root);
+ if (!root)
+ return 1;
+
+ pred = root;
- for (i = 0; i < filter->n_preds; i++) {
- pred = filter->preds[i];
- if (!pred->pop_n) {
- match = pred->fn(pred, rec, val1, val2);
- stack[top++] = match;
+ /* match is currently meaningless */
+ match = -1;
+
+ do {
+ switch (move) {
+ case MOVE_DOWN:
+ /* only AND and OR have children */
+ if (pred->left != FILTER_PRED_INVALID) {
+ /* If ops is set, then it was folded. */
+ if (!pred->ops) {
+ /* keep going to down the left side */
+ pred = &preds[pred->left];
+ continue;
+ }
+ /* We can treat folded ops as a leaf node */
+ match = process_ops(preds, pred, rec);
+ } else
+ match = pred->fn(pred, rec);
+ /* If this pred is the only pred */
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
+ continue;
+ case MOVE_UP_FROM_LEFT:
+ /*
+ * Check for short circuits.
+ *
+ * Optimization: !!match == (pred->op == OP_OR)
+ * is the same as:
+ * if ((match && pred->op == OP_OR) ||
+ * (!match && pred->op == OP_AND))
+ */
+ if (!!match == (pred->op == OP_OR)) {
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
+ continue;
+ }
+ /* now go down the right side of the tree. */
+ pred = &preds[pred->right];
+ move = MOVE_DOWN;
+ continue;
+ case MOVE_UP_FROM_RIGHT:
+ /* We finished this equation. */
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
continue;
}
- if (pred->pop_n > top) {
- WARN_ON_ONCE(1);
- return 0;
- }
- val1 = stack[--top];
- val2 = stack[--top];
- match = pred->fn(pred, rec, val1, val2);
- stack[top++] = match;
- }
+ done = 1;
+ } while (!done);
- return stack[--top];
+ return match;
}
EXPORT_SYMBOL_GPL(filter_match_preds);
@@ -414,6 +514,9 @@ static void parse_error(struct filter_parse_state *ps, int err, int pos)
static void remove_filter_string(struct event_filter *filter)
{
+ if (!filter)
+ return;
+
kfree(filter->filter_string);
filter->filter_string = NULL;
}
@@ -473,9 +576,10 @@ static void append_filter_err(struct filter_parse_state *ps,
void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
{
- struct event_filter *filter = call->filter;
+ struct event_filter *filter;
mutex_lock(&event_mutex);
+ filter = call->filter;
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
@@ -486,9 +590,10 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
void print_subsystem_event_filter(struct event_subsystem *system,
struct trace_seq *s)
{
- struct event_filter *filter = system->filter;
+ struct event_filter *filter;
mutex_lock(&event_mutex);
+ filter = system->filter;
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
@@ -539,10 +644,58 @@ static void filter_clear_pred(struct filter_pred *pred)
pred->regex.len = 0;
}
-static int filter_set_pred(struct filter_pred *dest,
+static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
+{
+ stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
+ if (!stack->preds)
+ return -ENOMEM;
+ stack->index = n_preds;
+ return 0;
+}
+
+static void __free_pred_stack(struct pred_stack *stack)
+{
+ kfree(stack->preds);
+ stack->index = 0;
+}
+
+static int __push_pred_stack(struct pred_stack *stack,
+ struct filter_pred *pred)
+{
+ int index = stack->index;
+
+ if (WARN_ON(index == 0))
+ return -ENOSPC;
+
+ stack->preds[--index] = pred;
+ stack->index = index;
+ return 0;
+}
+
+static struct filter_pred *
+__pop_pred_stack(struct pred_stack *stack)
+{
+ struct filter_pred *pred;
+ int index = stack->index;
+
+ pred = stack->preds[index++];
+ if (!pred)
+ return NULL;
+
+ stack->index = index;
+ return pred;
+}
+
+static int filter_set_pred(struct event_filter *filter,
+ int idx,
+ struct pred_stack *stack,
struct filter_pred *src,
filter_pred_fn_t fn)
{
+ struct filter_pred *dest = &filter->preds[idx];
+ struct filter_pred *left;
+ struct filter_pred *right;
+
*dest = *src;
if (src->field_name) {
dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
@@ -550,116 +703,140 @@ static int filter_set_pred(struct filter_pred *dest,
return -ENOMEM;
}
dest->fn = fn;
+ dest->index = idx;
- return 0;
+ if (dest->op == OP_OR || dest->op == OP_AND) {
+ right = __pop_pred_stack(stack);
+ left = __pop_pred_stack(stack);
+ if (!left || !right)
+ return -EINVAL;
+ /*
+ * If both children can be folded
+ * and they are the same op as this op or a leaf,
+ * then this op can be folded.
+ */
+ if (left->index & FILTER_PRED_FOLD &&
+ (left->op == dest->op ||
+ left->left == FILTER_PRED_INVALID) &&
+ right->index & FILTER_PRED_FOLD &&
+ (right->op == dest->op ||
+ right->left == FILTER_PRED_INVALID))
+ dest->index |= FILTER_PRED_FOLD;
+
+ dest->left = left->index & ~FILTER_PRED_FOLD;
+ dest->right = right->index & ~FILTER_PRED_FOLD;
+ left->parent = dest->index & ~FILTER_PRED_FOLD;
+ right->parent = dest->index | FILTER_PRED_IS_RIGHT;
+ } else {
+ /*
+ * Make dest->left invalid to be used as a quick
+ * way to know this is a leaf node.
+ */
+ dest->left = FILTER_PRED_INVALID;
+
+ /* All leafs allow folding the parent ops. */
+ dest->index |= FILTER_PRED_FOLD;
+ }
+
+ return __push_pred_stack(stack, dest);
}
-static void filter_disable_preds(struct ftrace_event_call *call)
+static void __free_preds(struct event_filter *filter)
{
- struct event_filter *filter = call->filter;
int i;
- call->flags &= ~TRACE_EVENT_FL_FILTERED;
+ if (filter->preds) {
+ for (i = 0; i < filter->a_preds; i++)
+ kfree(filter->preds[i].field_name);
+ kfree(filter->preds);
+ filter->preds = NULL;
+ }
+ filter->a_preds = 0;
filter->n_preds = 0;
-
- for (i = 0; i < MAX_FILTER_PRED; i++)
- filter->preds[i]->fn = filter_pred_none;
}
-static void __free_preds(struct event_filter *filter)
+static void filter_disable(struct ftrace_event_call *call)
{
- int i;
+ call->flags &= ~TRACE_EVENT_FL_FILTERED;
+}
+static void __free_filter(struct event_filter *filter)
+{
if (!filter)
return;
- for (i = 0; i < MAX_FILTER_PRED; i++) {
- if (filter->preds[i])
- filter_free_pred(filter->preds[i]);
- }
- kfree(filter->preds);
+ __free_preds(filter);
kfree(filter->filter_string);
kfree(filter);
}
+/*
+ * Called when destroying the ftrace_event_call.
+ * The call is being freed, so we do not need to worry about
+ * the call being currently used. This is for module code removing
+ * the tracepoints from within it.
+ */
void destroy_preds(struct ftrace_event_call *call)
{
- __free_preds(call->filter);
+ __free_filter(call->filter);
call->filter = NULL;
- call->flags &= ~TRACE_EVENT_FL_FILTERED;
}
-static struct event_filter *__alloc_preds(void)
+static struct event_filter *__alloc_filter(void)
{
struct event_filter *filter;
+
+ filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+ return filter;
+}
+
+static int __alloc_preds(struct event_filter *filter, int n_preds)
+{
struct filter_pred *pred;
int i;
- filter = kzalloc(sizeof(*filter), GFP_KERNEL);
- if (!filter)
- return ERR_PTR(-ENOMEM);
+ if (filter->preds)
+ __free_preds(filter);
- filter->n_preds = 0;
+ filter->preds =
+ kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL);
- filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
if (!filter->preds)
- goto oom;
+ return -ENOMEM;
- for (i = 0; i < MAX_FILTER_PRED; i++) {
- pred = kzalloc(sizeof(*pred), GFP_KERNEL);
- if (!pred)
- goto oom;
+ filter->a_preds = n_preds;
+ filter->n_preds = 0;
+
+ for (i = 0; i < n_preds; i++) {
+ pred = &filter->preds[i];
pred->fn = filter_pred_none;
- filter->preds[i] = pred;
}
- return filter;
-
-oom:
- __free_preds(filter);
- return ERR_PTR(-ENOMEM);
-}
-
-static int init_preds(struct ftrace_event_call *call)
-{
- if (call->filter)
- return 0;
-
- call->flags &= ~TRACE_EVENT_FL_FILTERED;
- call->filter = __alloc_preds();
- if (IS_ERR(call->filter))
- return PTR_ERR(call->filter);
-
return 0;
}
-static int init_subsystem_preds(struct event_subsystem *system)
+static void filter_free_subsystem_preds(struct event_subsystem *system)
{
struct ftrace_event_call *call;
- int err;
list_for_each_entry(call, &ftrace_events, list) {
if (strcmp(call->class->system, system->name) != 0)
continue;
- err = init_preds(call);
- if (err)
- return err;
+ filter_disable(call);
+ remove_filter_string(call->filter);
}
-
- return 0;
}
-static void filter_free_subsystem_preds(struct event_subsystem *system)
+static void filter_free_subsystem_filters(struct event_subsystem *system)
{
struct ftrace_event_call *call;
list_for_each_entry(call, &ftrace_events, list) {
if (strcmp(call->class->system, system->name) != 0)
continue;
-
- filter_disable_preds(call);
- remove_filter_string(call->filter);
+ __free_filter(call->filter);
+ call->filter = NULL;
}
}
@@ -667,18 +844,19 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
struct ftrace_event_call *call,
struct event_filter *filter,
struct filter_pred *pred,
+ struct pred_stack *stack,
filter_pred_fn_t fn)
{
int idx, err;
- if (filter->n_preds == MAX_FILTER_PRED) {
+ if (WARN_ON(filter->n_preds == filter->a_preds)) {
parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
return -ENOSPC;
}
idx = filter->n_preds;
- filter_clear_pred(filter->preds[idx]);
- err = filter_set_pred(filter->preds[idx], pred, fn);
+ filter_clear_pred(&filter->preds[idx]);
+ err = filter_set_pred(filter, idx, stack, pred, fn);
if (err)
return err;
@@ -763,6 +941,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
struct ftrace_event_call *call,
struct event_filter *filter,
struct filter_pred *pred,
+ struct pred_stack *stack,
bool dry_run)
{
struct ftrace_event_field *field;
@@ -770,17 +949,12 @@ static int filter_add_pred(struct filter_parse_state *ps,
unsigned long long val;
int ret;
- pred->fn = filter_pred_none;
+ fn = pred->fn = filter_pred_none;
- if (pred->op == OP_AND) {
- pred->pop_n = 2;
- fn = filter_pred_and;
+ if (pred->op == OP_AND)
goto add_pred_fn;
- } else if (pred->op == OP_OR) {
- pred->pop_n = 2;
- fn = filter_pred_or;
+ else if (pred->op == OP_OR)
goto add_pred_fn;
- }
field = find_event_field(call, pred->field_name);
if (!field) {
@@ -829,7 +1003,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
add_pred_fn:
if (!dry_run)
- return filter_add_pred_fn(ps, call, filter, pred, fn);
+ return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
return 0;
}
@@ -1187,6 +1361,234 @@ static int check_preds(struct filter_parse_state *ps)
return 0;
}
+static int count_preds(struct filter_parse_state *ps)
+{
+ struct postfix_elt *elt;
+ int n_preds = 0;
+
+ list_for_each_entry(elt, &ps->postfix, list) {
+ if (elt->op == OP_NONE)
+ continue;
+ n_preds++;
+ }
+
+ return n_preds;
+}
+
+/*
+ * The tree is walked at filtering of an event. If the tree is not correctly
+ * built, it may cause an infinite loop. Check here that the tree does
+ * indeed terminate.
+ */
+static int check_pred_tree(struct event_filter *filter,
+ struct filter_pred *root)
+{
+ struct filter_pred *preds;
+ struct filter_pred *pred;
+ enum move_type move = MOVE_DOWN;
+ int count = 0;
+ int done = 0;
+ int max;
+
+ /*
+ * The max that we can hit a node is three times.
+ * Once going down, once coming up from left, and
+ * once coming up from right. This is more than enough
+ * since leafs are only hit a single time.
+ */
+ max = 3 * filter->n_preds;
+
+ preds = filter->preds;
+ if (!preds)
+ return -EINVAL;
+ pred = root;
+
+ do {
+ if (WARN_ON(count++ > max))
+ return -EINVAL;
+
+ switch (move) {
+ case MOVE_DOWN:
+ if (pred->left != FILTER_PRED_INVALID) {
+ pred = &preds[pred->left];
+ continue;
+ }
+ /* A leaf at the root is just a leaf in the tree */
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
+ continue;
+ case MOVE_UP_FROM_LEFT:
+ pred = &preds[pred->right];
+ move = MOVE_DOWN;
+ continue;
+ case MOVE_UP_FROM_RIGHT:
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
+ continue;
+ }
+ done = 1;
+ } while (!done);
+
+ /* We are fine. */
+ return 0;
+}
+
+static int count_leafs(struct filter_pred *preds, struct filter_pred *root)
+{
+ struct filter_pred *pred;
+ enum move_type move = MOVE_DOWN;
+ int count = 0;
+ int done = 0;
+
+ pred = root;
+
+ do {
+ switch (move) {
+ case MOVE_DOWN:
+ if (pred->left != FILTER_PRED_INVALID) {
+ pred = &preds[pred->left];
+ continue;
+ }
+ /* A leaf at the root is just a leaf in the tree */
+ if (pred == root)
+ return 1;
+ count++;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
+ continue;
+ case MOVE_UP_FROM_LEFT:
+ pred = &preds[pred->right];
+ move = MOVE_DOWN;
+ continue;
+ case MOVE_UP_FROM_RIGHT:
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
+ continue;
+ }
+ done = 1;
+ } while (!done);
+
+ return count;
+}
+
+static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
+{
+ struct filter_pred *pred;
+ enum move_type move = MOVE_DOWN;
+ int count = 0;
+ int children;
+ int done = 0;
+
+ /* No need to keep the fold flag */
+ root->index &= ~FILTER_PRED_FOLD;
+
+ /* If the root is a leaf then do nothing */
+ if (root->left == FILTER_PRED_INVALID)
+ return 0;
+
+ /* count the children */
+ children = count_leafs(preds, &preds[root->left]);
+ children += count_leafs(preds, &preds[root->right]);
+
+ root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL);
+ if (!root->ops)
+ return -ENOMEM;
+
+ root->val = children;
+
+ pred = root;
+ do {
+ switch (move) {
+ case MOVE_DOWN:
+ if (pred->left != FILTER_PRED_INVALID) {
+ pred = &preds[pred->left];
+ continue;
+ }
+ if (WARN_ON(count == children))
+ return -EINVAL;
+ pred->index &= ~FILTER_PRED_FOLD;
+ root->ops[count++] = pred->index;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
+ continue;
+ case MOVE_UP_FROM_LEFT:
+ pred = &preds[pred->right];
+ move = MOVE_DOWN;
+ continue;
+ case MOVE_UP_FROM_RIGHT:
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
+ continue;
+ }
+ done = 1;
+ } while (!done);
+
+ return 0;
+}
+
+/*
+ * To optimize the processing of the ops, if we have several "ors" or
+ * "ands" together, we can put them in an array and process them all
+ * together speeding up the filter logic.
+ */
+static int fold_pred_tree(struct event_filter *filter,
+ struct filter_pred *root)
+{
+ struct filter_pred *preds;
+ struct filter_pred *pred;
+ enum move_type move = MOVE_DOWN;
+ int done = 0;
+ int err;
+
+ preds = filter->preds;
+ if (!preds)
+ return -EINVAL;
+ pred = root;
+
+ do {
+ switch (move) {
+ case MOVE_DOWN:
+ if (pred->index & FILTER_PRED_FOLD) {
+ err = fold_pred(preds, pred);
+ if (err)
+ return err;
+ /* Folded nodes are like leafs */
+ } else if (pred->left != FILTER_PRED_INVALID) {
+ pred = &preds[pred->left];
+ continue;
+ }
+
+ /* A leaf at the root is just a leaf in the tree */
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
+ continue;
+ case MOVE_UP_FROM_LEFT:
+ pred = &preds[pred->right];
+ move = MOVE_DOWN;
+ continue;
+ case MOVE_UP_FROM_RIGHT:
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
+ continue;
+ }
+ done = 1;
+ } while (!done);
+
+ return 0;
+}
+
static int replace_preds(struct ftrace_event_call *call,
struct event_filter *filter,
struct filter_parse_state *ps,
@@ -1195,14 +1597,32 @@ static int replace_preds(struct ftrace_event_call *call,
{
char *operand1 = NULL, *operand2 = NULL;
struct filter_pred *pred;
+ struct filter_pred *root;
struct postfix_elt *elt;
+ struct pred_stack stack = { }; /* init to NULL */
int err;
int n_preds = 0;
+ n_preds = count_preds(ps);
+ if (n_preds >= MAX_FILTER_PRED) {
+ parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
+ return -ENOSPC;
+ }
+
err = check_preds(ps);
if (err)
return err;
+ if (!dry_run) {
+ err = __alloc_pred_stack(&stack, n_preds);
+ if (err)
+ return err;
+ err = __alloc_preds(filter, n_preds);
+ if (err)
+ goto fail;
+ }
+
+ n_preds = 0;
list_for_each_entry(elt, &ps->postfix, list) {
if (elt->op == OP_NONE) {
if (!operand1)
@@ -1211,14 +1631,16 @@ static int replace_preds(struct ftrace_event_call *call,
operand2 = elt->operand;
else {
parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
- return -EINVAL;
+ err = -EINVAL;
+ goto fail;
}
continue;
}
- if (n_preds++ == MAX_FILTER_PRED) {
+ if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) {
parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
- return -ENOSPC;
+ err = -ENOSPC;
+ goto fail;
}
if (elt->op == OP_AND || elt->op == OP_OR) {
@@ -1228,76 +1650,181 @@ static int replace_preds(struct ftrace_event_call *call,
if (!operand1 || !operand2) {
parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
- return -EINVAL;
+ err = -EINVAL;
+ goto fail;
}
pred = create_pred(elt->op, operand1, operand2);
add_pred:
- if (!pred)
- return -ENOMEM;
- err = filter_add_pred(ps, call, filter, pred, dry_run);
+ if (!pred) {
+ err = -ENOMEM;
+ goto fail;
+ }
+ err = filter_add_pred(ps, call, filter, pred, &stack, dry_run);
filter_free_pred(pred);
if (err)
- return err;
+ goto fail;
operand1 = operand2 = NULL;
}
- return 0;
+ if (!dry_run) {
+ /* We should have one item left on the stack */
+ pred = __pop_pred_stack(&stack);
+ if (!pred)
+ return -EINVAL;
+ /* This item is where we start from in matching */
+ root = pred;
+ /* Make sure the stack is empty */
+ pred = __pop_pred_stack(&stack);
+ if (WARN_ON(pred)) {
+ err = -EINVAL;
+ filter->root = NULL;
+ goto fail;
+ }
+ err = check_pred_tree(filter, root);
+ if (err)
+ goto fail;
+
+ /* Optimize the tree */
+ err = fold_pred_tree(filter, root);
+ if (err)
+ goto fail;
+
+ /* We don't set root until we know it works */
+ barrier();
+ filter->root = root;
+ }
+
+ err = 0;
+fail:
+ __free_pred_stack(&stack);
+ return err;
}
+struct filter_list {
+ struct list_head list;
+ struct event_filter *filter;
+};
+
static int replace_system_preds(struct event_subsystem *system,
struct filter_parse_state *ps,
char *filter_string)
{
struct ftrace_event_call *call;
+ struct filter_list *filter_item;
+ struct filter_list *tmp;
+ LIST_HEAD(filter_list);
bool fail = true;
int err;
list_for_each_entry(call, &ftrace_events, list) {
- struct event_filter *filter = call->filter;
if (strcmp(call->class->system, system->name) != 0)
continue;
- /* try to see if the filter can be applied */
- err = replace_preds(call, filter, ps, filter_string, true);
+ /*
+ * Try to see if the filter can be applied
+ * (filter arg is ignored on dry_run)
+ */
+ err = replace_preds(call, NULL, ps, filter_string, true);
if (err)
+ goto fail;
+ }
+
+ list_for_each_entry(call, &ftrace_events, list) {
+ struct event_filter *filter;
+
+ if (strcmp(call->class->system, system->name) != 0)
continue;
- /* really apply the filter */
- filter_disable_preds(call);
- err = replace_preds(call, filter, ps, filter_string, false);
+ filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
+ if (!filter_item)
+ goto fail_mem;
+
+ list_add_tail(&filter_item->list, &filter_list);
+
+ filter_item->filter = __alloc_filter();
+ if (!filter_item->filter)
+ goto fail_mem;
+ filter = filter_item->filter;
+
+ /* Can only fail on no memory */
+ err = replace_filter_string(filter, filter_string);
if (err)
- filter_disable_preds(call);
- else {
+ goto fail_mem;
+
+ err = replace_preds(call, filter, ps, filter_string, false);
+ if (err) {
+ filter_disable(call);
+ parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+ append_filter_err(ps, filter);
+ } else
call->flags |= TRACE_EVENT_FL_FILTERED;
- replace_filter_string(filter, filter_string);
- }
+ /*
+ * Regardless of if this returned an error, we still
+ * replace the filter for the call.
+ */
+ filter = call->filter;
+ call->filter = filter_item->filter;
+ filter_item->filter = filter;
+
fail = false;
}
- if (fail) {
- parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
- return -EINVAL;
+ if (fail)
+ goto fail;
+
+ /*
+ * The calls can still be using the old filters.
+ * Do a synchronize_sched() to ensure all calls are
+ * done with them before we free them.
+ */
+ synchronize_sched();
+ list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
+ __free_filter(filter_item->filter);
+ list_del(&filter_item->list);
+ kfree(filter_item);
}
return 0;
+ fail:
+ /* No call succeeded */
+ list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
+ list_del(&filter_item->list);
+ kfree(filter_item);
+ }
+ parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+ return -EINVAL;
+ fail_mem:
+ /* If any call succeeded, we still need to sync */
+ if (!fail)
+ synchronize_sched();
+ list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
+ __free_filter(filter_item->filter);
+ list_del(&filter_item->list);
+ kfree(filter_item);
+ }
+ return -ENOMEM;
}
int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
{
- int err;
struct filter_parse_state *ps;
+ struct event_filter *filter;
+ struct event_filter *tmp;
+ int err = 0;
mutex_lock(&event_mutex);
- err = init_preds(call);
- if (err)
- goto out_unlock;
-
if (!strcmp(strstrip(filter_string), "0")) {
- filter_disable_preds(call);
- remove_filter_string(call->filter);
+ filter_disable(call);
+ filter = call->filter;
+ if (!filter)
+ goto out_unlock;
+ call->filter = NULL;
+ /* Make sure the filter is not being used */
+ synchronize_sched();
+ __free_filter(filter);
goto out_unlock;
}
@@ -1306,22 +1833,41 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
if (!ps)
goto out_unlock;
- filter_disable_preds(call);
- replace_filter_string(call->filter, filter_string);
+ filter = __alloc_filter();
+ if (!filter) {
+ kfree(ps);
+ goto out_unlock;
+ }
+
+ replace_filter_string(filter, filter_string);
parse_init(ps, filter_ops, filter_string);
err = filter_parse(ps);
if (err) {
- append_filter_err(ps, call->filter);
+ append_filter_err(ps, filter);
goto out;
}
- err = replace_preds(call, call->filter, ps, filter_string, false);
- if (err)
- append_filter_err(ps, call->filter);
- else
+ err = replace_preds(call, filter, ps, filter_string, false);
+ if (err) {
+ filter_disable(call);
+ append_filter_err(ps, filter);
+ } else
call->flags |= TRACE_EVENT_FL_FILTERED;
out:
+ /*
+ * Always swap the call filter with the new filter
+ * even if there was an error. If there was an error
+ * in the filter, we disable the filter and show the error
+ * string
+ */
+ tmp = call->filter;
+ call->filter = filter;
+ if (tmp) {
+ /* Make sure the call is done with the filter */
+ synchronize_sched();
+ __free_filter(tmp);
+ }
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
@@ -1334,18 +1880,21 @@ out_unlock:
int apply_subsystem_event_filter(struct event_subsystem *system,
char *filter_string)
{
- int err;
struct filter_parse_state *ps;
+ struct event_filter *filter;
+ int err = 0;
mutex_lock(&event_mutex);
- err = init_subsystem_preds(system);
- if (err)
- goto out_unlock;
-
if (!strcmp(strstrip(filter_string), "0")) {
filter_free_subsystem_preds(system);
remove_filter_string(system->filter);
+ filter = system->filter;
+ system->filter = NULL;
+ /* Ensure all filters are no longer used */
+ synchronize_sched();
+ filter_free_subsystem_filters(system);
+ __free_filter(filter);
goto out_unlock;
}
@@ -1354,7 +1903,17 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
if (!ps)
goto out_unlock;
- replace_filter_string(system->filter, filter_string);
+ filter = __alloc_filter();
+ if (!filter)
+ goto out;
+
+ replace_filter_string(filter, filter_string);
+ /*
+ * No event actually uses the system filter
+ * we can free it without synchronize_sched().
+ */
+ __free_filter(system->filter);
+ system->filter = filter;
parse_init(ps, filter_ops, filter_string);
err = filter_parse(ps);
@@ -1384,7 +1943,7 @@ void ftrace_profile_free_filter(struct perf_event *event)
struct event_filter *filter = event->filter;
event->filter = NULL;
- __free_preds(filter);
+ __free_filter(filter);
}
int ftrace_profile_set_filter(struct perf_event *event, int event_id,
@@ -1410,8 +1969,8 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
if (event->filter)
goto out_unlock;
- filter = __alloc_preds();
- if (IS_ERR(filter)) {
+ filter = __alloc_filter();
+ if (!filter) {
err = PTR_ERR(filter);
goto out_unlock;
}
@@ -1419,7 +1978,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
err = -ENOMEM;
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
- goto free_preds;
+ goto free_filter;
parse_init(ps, filter_ops, filter_str);
err = filter_parse(ps);
@@ -1435,9 +1994,9 @@ free_ps:
postfix_clear(ps);
kfree(ps);
-free_preds:
+free_filter:
if (err)
- __free_preds(filter);
+ __free_filter(filter);
out_unlock:
mutex_unlock(&event_mutex);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 2dec9bcde8b4..8435b43b1782 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -353,6 +353,43 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
kfree(data);
}
+/* Bitfield fetch function */
+struct bitfield_fetch_param {
+ struct fetch_param orig;
+ unsigned char hi_shift;
+ unsigned char low_shift;
+};
+
+#define DEFINE_FETCH_bitfield(type) \
+static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
+ void *data, void *dest) \
+{ \
+ struct bitfield_fetch_param *bprm = data; \
+ type buf = 0; \
+ call_fetch(&bprm->orig, regs, &buf); \
+ if (buf) { \
+ buf <<= bprm->hi_shift; \
+ buf >>= bprm->low_shift; \
+ } \
+ *(type *)dest = buf; \
+}
+DEFINE_BASIC_FETCH_FUNCS(bitfield)
+#define fetch_bitfield_string NULL
+#define fetch_bitfield_string_size NULL
+
+static __kprobes void
+free_bitfield_fetch_param(struct bitfield_fetch_param *data)
+{
+ /*
+ * Don't check the bitfield itself, because this must be the
+ * last fetch function.
+ */
+ if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
+ free_deref_fetch_param(data->orig.data);
+ else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
+ free_symbol_cache(data->orig.data);
+ kfree(data);
+}
/* Default (unsigned long) fetch type */
#define __DEFAULT_FETCH_TYPE(t) u##t
#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
@@ -367,6 +404,7 @@ enum {
FETCH_MTD_memory,
FETCH_MTD_symbol,
FETCH_MTD_deref,
+ FETCH_MTD_bitfield,
FETCH_MTD_END,
};
@@ -387,6 +425,7 @@ ASSIGN_FETCH_FUNC(retval, ftype), \
ASSIGN_FETCH_FUNC(memory, ftype), \
ASSIGN_FETCH_FUNC(symbol, ftype), \
ASSIGN_FETCH_FUNC(deref, ftype), \
+ASSIGN_FETCH_FUNC(bitfield, ftype), \
} \
}
@@ -430,9 +469,33 @@ static const struct fetch_type *find_fetch_type(const char *type)
if (!type)
type = DEFAULT_FETCH_TYPE_STR;
+ /* Special case: bitfield */
+ if (*type == 'b') {
+ unsigned long bs;
+ type = strchr(type, '/');
+ if (!type)
+ goto fail;
+ type++;
+ if (strict_strtoul(type, 0, &bs))
+ goto fail;
+ switch (bs) {
+ case 8:
+ return find_fetch_type("u8");
+ case 16:
+ return find_fetch_type("u16");
+ case 32:
+ return find_fetch_type("u32");
+ case 64:
+ return find_fetch_type("u64");
+ default:
+ goto fail;
+ }
+ }
+
for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
if (strcmp(type, fetch_type_table[i].name) == 0)
return &fetch_type_table[i];
+fail:
return NULL;
}
@@ -586,7 +649,9 @@ error:
static void free_probe_arg(struct probe_arg *arg)
{
- if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
+ if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
+ free_bitfield_fetch_param(arg->fetch.data);
+ else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
free_deref_fetch_param(arg->fetch.data);
else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
free_symbol_cache(arg->fetch.data);
@@ -767,16 +832,15 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
}
break;
case '+': /* deref memory */
+ arg++; /* Skip '+', because strict_strtol() rejects it. */
case '-':
tmp = strchr(arg, '(');
if (!tmp)
break;
*tmp = '\0';
- ret = strict_strtol(arg + 1, 0, &offset);
+ ret = strict_strtol(arg, 0, &offset);
if (ret)
break;
- if (arg[0] == '-')
- offset = -offset;
arg = tmp + 1;
tmp = strrchr(arg, ')');
if (tmp) {
@@ -807,6 +871,41 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
return ret;
}
+#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
+
+/* Bitfield type needs to be parsed into a fetch function */
+static int __parse_bitfield_probe_arg(const char *bf,
+ const struct fetch_type *t,
+ struct fetch_param *f)
+{
+ struct bitfield_fetch_param *bprm;
+ unsigned long bw, bo;
+ char *tail;
+
+ if (*bf != 'b')
+ return 0;
+
+ bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
+ if (!bprm)
+ return -ENOMEM;
+ bprm->orig = *f;
+ f->fn = t->fetch[FETCH_MTD_bitfield];
+ f->data = (void *)bprm;
+
+ bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
+ if (bw == 0 || *tail != '@')
+ return -EINVAL;
+
+ bf = tail + 1;
+ bo = simple_strtoul(bf, &tail, 0);
+ if (tail == bf || *tail != '/')
+ return -EINVAL;
+
+ bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
+ bprm->low_shift = bprm->hi_shift + bo;
+ return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
+}
+
/* String length checking wrapper */
static int parse_probe_arg(char *arg, struct trace_probe *tp,
struct probe_arg *parg, int is_return)
@@ -836,6 +935,8 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
parg->offset = tp->size;
tp->size += parg->type->size;
ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
+ if (ret >= 0 && t != NULL)
+ ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
if (ret >= 0) {
parg->fetch_size.fn = get_fetch_size_function(parg->type,
parg->fetch.fn);
@@ -1130,7 +1231,7 @@ static int command_trace_probe(const char *buf)
return ret;
}
-#define WRITE_BUFSIZE 128
+#define WRITE_BUFSIZE 4096
static ssize_t probes_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 02272baa2206..456be9063c2d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -529,24 +529,34 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
* @entry: The trace entry field from the ring buffer
*
* Prints the generic fields of irqs off, in hard or softirq, preempt
- * count and lock depth.
+ * count.
*/
int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
{
- int hardirq, softirq;
+ char hardsoft_irq;
+ char need_resched;
+ char irqs_off;
+ int hardirq;
+ int softirq;
int ret;
hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
+ irqs_off =
+ (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+ (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' :
+ '.';
+ need_resched =
+ (entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.';
+ hardsoft_irq =
+ (hardirq && softirq) ? 'H' :
+ hardirq ? 'h' :
+ softirq ? 's' :
+ '.';
+
if (!trace_seq_printf(s, "%c%c%c",
- (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
- (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
- 'X' : '.',
- (entry->flags & TRACE_FLAG_NEED_RESCHED) ?
- 'N' : '.',
- (hardirq && softirq) ? 'H' :
- hardirq ? 'h' : softirq ? 's' : '.'))
+ irqs_off, need_resched, hardsoft_irq))
return 0;
if (entry->preempt_count)
@@ -554,13 +564,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
else
ret = trace_seq_putc(s, '.');
- if (!ret)
- return 0;
-
- if (entry->lock_depth < 0)
- return trace_seq_putc(s, '.');
-
- return trace_seq_printf(s, "%d", entry->lock_depth);
+ return ret;
}
static int
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 8f758d070c43..7e62c0a18456 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -247,51 +247,3 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr)
ctx_trace = tr;
}
-static void stop_sched_trace(struct trace_array *tr)
-{
- tracing_stop_sched_switch_record();
-}
-
-static int sched_switch_trace_init(struct trace_array *tr)
-{
- ctx_trace = tr;
- tracing_reset_online_cpus(tr);
- tracing_start_sched_switch_record();
- return 0;
-}
-
-static void sched_switch_trace_reset(struct trace_array *tr)
-{
- if (sched_ref)
- stop_sched_trace(tr);
-}
-
-static void sched_switch_trace_start(struct trace_array *tr)
-{
- sched_stopped = 0;
-}
-
-static void sched_switch_trace_stop(struct trace_array *tr)
-{
- sched_stopped = 1;
-}
-
-static struct tracer sched_switch_trace __read_mostly =
-{
- .name = "sched_switch",
- .init = sched_switch_trace_init,
- .reset = sched_switch_trace_reset,
- .start = sched_switch_trace_start,
- .stop = sched_switch_trace_stop,
- .wait_pipe = poll_wait_pipe,
-#ifdef CONFIG_FTRACE_SELFTEST
- .selftest = trace_selftest_startup_sched_switch,
-#endif
-};
-
-__init static int init_sched_switch_trace(void)
-{
- return register_tracer(&sched_switch_trace);
-}
-device_initcall(init_sched_switch_trace);
-
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5c9fe08d2093..ee7b5a0bb9f8 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -60,6 +60,19 @@ extern struct syscall_metadata *__stop_syscalls_metadata[];
static struct syscall_metadata **syscalls_metadata;
+#ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
+{
+ /*
+ * Only compare after the "sys" prefix. Archs that use
+ * syscall wrappers may have syscalls symbols aliases prefixed
+ * with "SyS" instead of "sys", leading to an unwanted
+ * mismatch.
+ */
+ return !strcmp(sym + 3, name + 3);
+}
+#endif
+
static __init struct syscall_metadata *
find_syscall_meta(unsigned long syscall)
{
@@ -72,14 +85,11 @@ find_syscall_meta(unsigned long syscall)
stop = __stop_syscalls_metadata;
kallsyms_lookup(syscall, NULL, NULL, NULL, str);
+ if (arch_syscall_match_sym_name(str, "sys_ni_syscall"))
+ return NULL;
+
for ( ; start < stop; start++) {
- /*
- * Only compare after the "sys" prefix. Archs that use
- * syscall wrappers may have syscalls symbols aliases prefixed
- * with "SyS" instead of "sys", leading to an unwanted
- * mismatch.
- */
- if ((*start)->name && !strcmp((*start)->name + 3, str + 3))
+ if ((*start)->name && arch_syscall_match_sym_name(str, (*start)->name))
return *start;
}
return NULL;
@@ -359,7 +369,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call)
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
- if (num < 0 || num >= NR_syscalls)
+ if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
if (!sys_refcount_enter)
@@ -377,7 +387,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call)
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
- if (num < 0 || num >= NR_syscalls)
+ if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return;
mutex_lock(&syscall_trace_lock);
sys_refcount_enter--;
@@ -393,7 +403,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call)
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
- if (num < 0 || num >= NR_syscalls)
+ if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return -ENOSYS;
mutex_lock(&syscall_trace_lock);
if (!sys_refcount_exit)
@@ -411,7 +421,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
int num;
num = ((struct syscall_metadata *)call->data)->syscall_nr;
- if (num < 0 || num >= NR_syscalls)
+ if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
return;
mutex_lock(&syscall_trace_lock);
sys_refcount_exit--;
@@ -424,6 +434,14 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call)
int init_syscall_trace(struct ftrace_event_call *call)
{
int id;
+ int num;
+
+ num = ((struct syscall_metadata *)call->data)->syscall_nr;
+ if (num < 0 || num >= NR_syscalls) {
+ pr_debug("syscall %s metadata not mapped, disabling ftrace event\n",
+ ((struct syscall_metadata *)call->data)->name);
+ return -ENOSYS;
+ }
if (set_syscall_print_fmt(call) < 0)
return -ENOMEM;
@@ -438,7 +456,7 @@ int init_syscall_trace(struct ftrace_event_call *call)
return id;
}
-unsigned long __init arch_syscall_addr(int nr)
+unsigned long __init __weak arch_syscall_addr(int nr)
{
return (unsigned long)sys_call_table[nr];
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 11869faa6819..5ca7ce9ce754 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -79,7 +79,9 @@ enum {
MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
- MAYDAY_INITIAL_TIMEOUT = HZ / 100, /* call for help after 10ms */
+ MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
+ /* call for help after 10ms
+ (min two ticks) */
MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */
CREATE_COOLDOWN = HZ, /* time to breath after fail */
TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */
@@ -249,10 +251,12 @@ struct workqueue_struct *system_wq __read_mostly;
struct workqueue_struct *system_long_wq __read_mostly;
struct workqueue_struct *system_nrt_wq __read_mostly;
struct workqueue_struct *system_unbound_wq __read_mostly;
+struct workqueue_struct *system_freezable_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_wq);
EXPORT_SYMBOL_GPL(system_long_wq);
EXPORT_SYMBOL_GPL(system_nrt_wq);
EXPORT_SYMBOL_GPL(system_unbound_wq);
+EXPORT_SYMBOL_GPL(system_freezable_wq);
#define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h>
@@ -314,6 +318,11 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
static struct debug_obj_descr work_debug_descr;
+static void *work_debug_hint(void *addr)
+{
+ return ((struct work_struct *) addr)->func;
+}
+
/*
* fixup_init is called when:
* - an active object is initialized
@@ -385,6 +394,7 @@ static int work_fixup_free(void *addr, enum debug_obj_state state)
static struct debug_obj_descr work_debug_descr = {
.name = "work_struct",
+ .debug_hint = work_debug_hint,
.fixup_init = work_fixup_init,
.fixup_activate = work_fixup_activate,
.fixup_free = work_fixup_free,
@@ -2047,6 +2057,15 @@ repeat:
move_linked_works(work, scheduled, &n);
process_scheduled_works(rescuer);
+
+ /*
+ * Leave this gcwq. If keep_working() is %true, notify a
+ * regular worker; otherwise, we end up with 0 concurrency
+ * and stalling the execution.
+ */
+ if (keep_working(gcwq))
+ wake_up_worker(gcwq);
+
spin_unlock_irq(&gcwq->lock);
}
@@ -2956,7 +2975,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
*/
spin_lock(&workqueue_lock);
- if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
+ if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
for_each_cwq_cpu(cpu, wq)
get_cwq(cpu, wq)->max_active = 0;
@@ -3068,7 +3087,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
spin_lock_irq(&gcwq->lock);
- if (!(wq->flags & WQ_FREEZEABLE) ||
+ if (!(wq->flags & WQ_FREEZABLE) ||
!(gcwq->flags & GCWQ_FREEZING))
get_cwq(gcwq->cpu, wq)->max_active = max_active;
@@ -3318,7 +3337,7 @@ static int __cpuinit trustee_thread(void *__gcwq)
* want to get it over with ASAP - spam rescuers, wake up as
* many idlers as necessary and create new ones till the
* worklist is empty. Note that if the gcwq is frozen, there
- * may be frozen works in freezeable cwqs. Don't declare
+ * may be frozen works in freezable cwqs. Don't declare
* completion while frozen.
*/
while (gcwq->nr_workers != gcwq->nr_idle ||
@@ -3576,9 +3595,9 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
/**
* freeze_workqueues_begin - begin freezing workqueues
*
- * Start freezing workqueues. After this function returns, all
- * freezeable workqueues will queue new works to their frozen_works
- * list instead of gcwq->worklist.
+ * Start freezing workqueues. After this function returns, all freezable
+ * workqueues will queue new works to their frozen_works list instead of
+ * gcwq->worklist.
*
* CONTEXT:
* Grabs and releases workqueue_lock and gcwq->lock's.
@@ -3604,7 +3623,7 @@ void freeze_workqueues_begin(void)
list_for_each_entry(wq, &workqueues, list) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
- if (cwq && wq->flags & WQ_FREEZEABLE)
+ if (cwq && wq->flags & WQ_FREEZABLE)
cwq->max_active = 0;
}
@@ -3615,7 +3634,7 @@ void freeze_workqueues_begin(void)
}
/**
- * freeze_workqueues_busy - are freezeable workqueues still busy?
+ * freeze_workqueues_busy - are freezable workqueues still busy?
*
* Check whether freezing is complete. This function must be called
* between freeze_workqueues_begin() and thaw_workqueues().
@@ -3624,8 +3643,8 @@ void freeze_workqueues_begin(void)
* Grabs and releases workqueue_lock.
*
* RETURNS:
- * %true if some freezeable workqueues are still busy. %false if
- * freezing is complete.
+ * %true if some freezable workqueues are still busy. %false if freezing
+ * is complete.
*/
bool freeze_workqueues_busy(void)
{
@@ -3645,7 +3664,7 @@ bool freeze_workqueues_busy(void)
list_for_each_entry(wq, &workqueues, list) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
- if (!cwq || !(wq->flags & WQ_FREEZEABLE))
+ if (!cwq || !(wq->flags & WQ_FREEZABLE))
continue;
BUG_ON(cwq->nr_active < 0);
@@ -3690,7 +3709,7 @@ void thaw_workqueues(void)
list_for_each_entry(wq, &workqueues, list) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
- if (!cwq || !(wq->flags & WQ_FREEZEABLE))
+ if (!cwq || !(wq->flags & WQ_FREEZABLE))
continue;
/* restore max_active and repopulate worklist */
@@ -3764,8 +3783,10 @@ static int __init init_workqueues(void)
system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
WQ_UNBOUND_MAX_ACTIVE);
+ system_freezable_wq = alloc_workqueue("events_freezable",
+ WQ_FREEZABLE, 0);
BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
- !system_unbound_wq);
+ !system_unbound_wq || !system_freezable_wq);
return 0;
}
early_initcall(init_workqueues);
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index deebcc57d4e6..9d86e45086f5 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -249,14 +249,17 @@ static struct debug_bucket *get_bucket(unsigned long addr)
static void debug_print_object(struct debug_obj *obj, char *msg)
{
+ struct debug_obj_descr *descr = obj->descr;
static int limit;
- if (limit < 5 && obj->descr != descr_test) {
+ if (limit < 5 && descr != descr_test) {
+ void *hint = descr->debug_hint ?
+ descr->debug_hint(obj->object) : NULL;
limit++;
WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
- "object type: %s\n",
+ "object type: %s hint: %pS\n",
msg, obj_states[obj->state], obj->astate,
- obj->descr->name);
+ descr->name, hint);
}
debug_objects_warnings++;
}
diff --git a/lib/list_debug.c b/lib/list_debug.c
index 344c710d16ca..b8029a5583ff 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -35,6 +35,31 @@ void __list_add(struct list_head *new,
}
EXPORT_SYMBOL(__list_add);
+void __list_del_entry(struct list_head *entry)
+{
+ struct list_head *prev, *next;
+
+ prev = entry->prev;
+ next = entry->next;
+
+ if (WARN(next == LIST_POISON1,
+ "list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+ entry, LIST_POISON1) ||
+ WARN(prev == LIST_POISON2,
+ "list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+ entry, LIST_POISON2) ||
+ WARN(prev->next != entry,
+ "list_del corruption. prev->next should be %p, "
+ "but was %p\n", entry, prev->next) ||
+ WARN(next->prev != entry,
+ "list_del corruption. next->prev should be %p, "
+ "but was %p\n", entry, next->prev))
+ return;
+
+ __list_del(prev, next);
+}
+EXPORT_SYMBOL(__list_del_entry);
+
/**
* list_del - deletes entry from list.
* @entry: the element to delete from the list.
@@ -43,19 +68,7 @@ EXPORT_SYMBOL(__list_add);
*/
void list_del(struct list_head *entry)
{
- WARN(entry->next == LIST_POISON1,
- "list_del corruption, next is LIST_POISON1 (%p)\n",
- LIST_POISON1);
- WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2,
- "list_del corruption, prev is LIST_POISON2 (%p)\n",
- LIST_POISON2);
- WARN(entry->prev->next != entry,
- "list_del corruption. prev->next should be %p, "
- "but was %p\n", entry, entry->prev->next);
- WARN(entry->next->prev != entry,
- "list_del corruption. next->prev should be %p, "
- "but was %p\n", entry, entry->next->prev);
- __list_del(entry->prev, entry->next);
+ __list_del_entry(entry);
entry->next = LIST_POISON1;
entry->prev = LIST_POISON2;
}
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 5021cbc34411..ac09f2226dc7 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -148,7 +148,7 @@ nla_policy_len(const struct nla_policy *p, int n)
{
int i, len = 0;
- for (i = 0; i < n; i++) {
+ for (i = 0; i < n; i++, p++) {
if (p->len)
len += nla_total_size(p->len);
else if (nla_attr_minlen[p->type])
diff --git a/lib/plist.c b/lib/plist.c
index 1471988d9190..0ae7e6431726 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -28,6 +28,8 @@
#ifdef CONFIG_DEBUG_PI_LIST
+static struct plist_head test_head;
+
static void plist_check_prev_next(struct list_head *t, struct list_head *p,
struct list_head *n)
{
@@ -54,12 +56,13 @@ static void plist_check_list(struct list_head *top)
static void plist_check_head(struct plist_head *head)
{
- WARN_ON(!head->rawlock && !head->spinlock);
+ WARN_ON(head != &test_head && !head->rawlock && !head->spinlock);
if (head->rawlock)
WARN_ON_SMP(!raw_spin_is_locked(head->rawlock));
if (head->spinlock)
WARN_ON_SMP(!spin_is_locked(head->spinlock));
- plist_check_list(&head->prio_list);
+ if (!plist_head_empty(head))
+ plist_check_list(&plist_first(head)->prio_list);
plist_check_list(&head->node_list);
}
@@ -75,25 +78,33 @@ static void plist_check_head(struct plist_head *head)
*/
void plist_add(struct plist_node *node, struct plist_head *head)
{
- struct plist_node *iter;
+ struct plist_node *first, *iter, *prev = NULL;
+ struct list_head *node_next = &head->node_list;
plist_check_head(head);
WARN_ON(!plist_node_empty(node));
+ WARN_ON(!list_empty(&node->prio_list));
+
+ if (plist_head_empty(head))
+ goto ins_node;
- list_for_each_entry(iter, &head->prio_list, plist.prio_list) {
- if (node->prio < iter->prio)
- goto lt_prio;
- else if (node->prio == iter->prio) {
- iter = list_entry(iter->plist.prio_list.next,
- struct plist_node, plist.prio_list);
- goto eq_prio;
+ first = iter = plist_first(head);
+
+ do {
+ if (node->prio < iter->prio) {
+ node_next = &iter->node_list;
+ break;
}
- }
-lt_prio:
- list_add_tail(&node->plist.prio_list, &iter->plist.prio_list);
-eq_prio:
- list_add_tail(&node->plist.node_list, &iter->plist.node_list);
+ prev = iter;
+ iter = list_entry(iter->prio_list.next,
+ struct plist_node, prio_list);
+ } while (iter != first);
+
+ if (!prev || prev->prio != node->prio)
+ list_add_tail(&node->prio_list, &iter->prio_list);
+ins_node:
+ list_add_tail(&node->node_list, node_next);
plist_check_head(head);
}
@@ -108,14 +119,98 @@ void plist_del(struct plist_node *node, struct plist_head *head)
{
plist_check_head(head);
- if (!list_empty(&node->plist.prio_list)) {
- struct plist_node *next = plist_first(&node->plist);
+ if (!list_empty(&node->prio_list)) {
+ if (node->node_list.next != &head->node_list) {
+ struct plist_node *next;
+
+ next = list_entry(node->node_list.next,
+ struct plist_node, node_list);
- list_move_tail(&next->plist.prio_list, &node->plist.prio_list);
- list_del_init(&node->plist.prio_list);
+ /* add the next plist_node into prio_list */
+ if (list_empty(&next->prio_list))
+ list_add(&next->prio_list, &node->prio_list);
+ }
+ list_del_init(&node->prio_list);
}
- list_del_init(&node->plist.node_list);
+ list_del_init(&node->node_list);
plist_check_head(head);
}
+
+#ifdef CONFIG_DEBUG_PI_LIST
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+static struct plist_node __initdata test_node[241];
+
+static void __init plist_test_check(int nr_expect)
+{
+ struct plist_node *first, *prio_pos, *node_pos;
+
+ if (plist_head_empty(&test_head)) {
+ BUG_ON(nr_expect != 0);
+ return;
+ }
+
+ prio_pos = first = plist_first(&test_head);
+ plist_for_each(node_pos, &test_head) {
+ if (nr_expect-- < 0)
+ break;
+ if (node_pos == first)
+ continue;
+ if (node_pos->prio == prio_pos->prio) {
+ BUG_ON(!list_empty(&node_pos->prio_list));
+ continue;
+ }
+
+ BUG_ON(prio_pos->prio > node_pos->prio);
+ BUG_ON(prio_pos->prio_list.next != &node_pos->prio_list);
+ prio_pos = node_pos;
+ }
+
+ BUG_ON(nr_expect != 0);
+ BUG_ON(prio_pos->prio_list.next != &first->prio_list);
+}
+
+static int __init plist_test(void)
+{
+ int nr_expect = 0, i, loop;
+ unsigned int r = local_clock();
+
+ printk(KERN_INFO "start plist test\n");
+ plist_head_init(&test_head, NULL);
+ for (i = 0; i < ARRAY_SIZE(test_node); i++)
+ plist_node_init(test_node + i, 0);
+
+ for (loop = 0; loop < 1000; loop++) {
+ r = r * 193939 % 47629;
+ i = r % ARRAY_SIZE(test_node);
+ if (plist_node_empty(test_node + i)) {
+ r = r * 193939 % 47629;
+ test_node[i].prio = r % 99;
+ plist_add(test_node + i, &test_head);
+ nr_expect++;
+ } else {
+ plist_del(test_node + i, &test_head);
+ nr_expect--;
+ }
+ plist_test_check(nr_expect);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(test_node); i++) {
+ if (plist_node_empty(test_node + i))
+ continue;
+ plist_del(test_node + i, &test_head);
+ nr_expect--;
+ plist_test_check(nr_expect);
+ }
+
+ printk(KERN_INFO "end plist test\n");
+ return 0;
+}
+
+module_init(plist_test);
+
+#endif
diff --git a/lib/rwsem.c b/lib/rwsem.c
index f236d7cd5cf3..aa7c3052261f 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -222,8 +222,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
/*
* wait for the read lock to be granted
*/
-asmregparm struct rw_semaphore __sched *
-rwsem_down_read_failed(struct rw_semaphore *sem)
+struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
{
return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
-RWSEM_ACTIVE_READ_BIAS);
@@ -232,8 +231,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
/*
* wait for the write lock to be granted
*/
-asmregparm struct rw_semaphore __sched *
-rwsem_down_write_failed(struct rw_semaphore *sem)
+struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
{
return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
-RWSEM_ACTIVE_WRITE_BIAS);
@@ -243,7 +241,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
* handle waking up a waiter on the semaphore
* - up_read/up_write has decremented the active part of count if we come here
*/
-asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
+struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
{
unsigned long flags;
@@ -263,7 +261,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
* - caller incremented waiting part of count and discovered it still negative
* - just wake up any readers at the front of the queue
*/
-asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
+struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
{
unsigned long flags;
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index c47bbe11b804..93ca08b8a451 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -686,8 +686,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
/*
* Ensure that the address returned is DMA'ble
*/
- if (!dma_capable(dev, dev_addr, size))
- panic("map_single: bounce buffer is not DMA'ble");
+ if (!dma_capable(dev, dev_addr, size)) {
+ swiotlb_tbl_unmap_single(dev, map, size, dir);
+ dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer);
+ }
return dev_addr;
}
diff --git a/mm/Makefile b/mm/Makefile
index 2b1b575ae712..42a8326c3e3d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -7,7 +7,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
vmalloc.o pagewalk.o pgtable-generic.o
-obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
+obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
maccess.o page_alloc.o page-writeback.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
@@ -15,6 +15,12 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
$(mmu-y)
obj-y += init-mm.o
+ifdef CONFIG_NO_BOOTMEM
+ obj-y += nobootmem.o
+else
+ obj-y += bootmem.o
+endif
+
obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
obj-$(CONFIG_BOUNCE) += bounce.o
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 13b0caa9793c..07aeb89e396e 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -23,6 +23,13 @@
#include "internal.h"
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+struct pglist_data __refdata contig_page_data = {
+ .bdata = &bootmem_node_data[0]
+};
+EXPORT_SYMBOL(contig_page_data);
+#endif
+
unsigned long max_low_pfn;
unsigned long min_low_pfn;
unsigned long max_pfn;
@@ -35,7 +42,6 @@ unsigned long max_pfn;
unsigned long saved_max_pfn;
#endif
-#ifndef CONFIG_NO_BOOTMEM
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
@@ -146,7 +152,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
min_low_pfn = start;
return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
}
-#endif
+
/*
* free_bootmem_late - free bootmem pages directly to page allocator
* @addr: starting address of the range
@@ -171,53 +177,6 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
}
}
-#ifdef CONFIG_NO_BOOTMEM
-static void __init __free_pages_memory(unsigned long start, unsigned long end)
-{
- int i;
- unsigned long start_aligned, end_aligned;
- int order = ilog2(BITS_PER_LONG);
-
- start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
- end_aligned = end & ~(BITS_PER_LONG - 1);
-
- if (end_aligned <= start_aligned) {
- for (i = start; i < end; i++)
- __free_pages_bootmem(pfn_to_page(i), 0);
-
- return;
- }
-
- for (i = start; i < start_aligned; i++)
- __free_pages_bootmem(pfn_to_page(i), 0);
-
- for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
- __free_pages_bootmem(pfn_to_page(i), order);
-
- for (i = end_aligned; i < end; i++)
- __free_pages_bootmem(pfn_to_page(i), 0);
-}
-
-unsigned long __init free_all_memory_core_early(int nodeid)
-{
- int i;
- u64 start, end;
- unsigned long count = 0;
- struct range *range = NULL;
- int nr_range;
-
- nr_range = get_free_all_memory_range(&range, nodeid);
-
- for (i = 0; i < nr_range; i++) {
- start = range[i].start;
- end = range[i].end;
- count += end - start;
- __free_pages_memory(start, end);
- }
-
- return count;
-}
-#else
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
{
int aligned;
@@ -278,7 +237,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
return count;
}
-#endif
/**
* free_all_bootmem_node - release a node's free pages to the buddy allocator
@@ -289,12 +247,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
{
register_page_bootmem_info_node(pgdat);
-#ifdef CONFIG_NO_BOOTMEM
- /* free_all_memory_core_early(MAX_NUMNODES) will be called later */
- return 0;
-#else
return free_all_bootmem_core(pgdat->bdata);
-#endif
}
/**
@@ -304,16 +257,6 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
*/
unsigned long __init free_all_bootmem(void)
{
-#ifdef CONFIG_NO_BOOTMEM
- /*
- * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
- * because in some case like Node0 doesnt have RAM installed
- * low ram will be on Node1
- * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
- * will be used instead of only Node0 related
- */
- return free_all_memory_core_early(MAX_NUMNODES);
-#else
unsigned long total_pages = 0;
bootmem_data_t *bdata;
@@ -321,10 +264,8 @@ unsigned long __init free_all_bootmem(void)
total_pages += free_all_bootmem_core(bdata);
return total_pages;
-#endif
}
-#ifndef CONFIG_NO_BOOTMEM
static void __init __free(bootmem_data_t *bdata,
unsigned long sidx, unsigned long eidx)
{
@@ -419,7 +360,6 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
}
BUG();
}
-#endif
/**
* free_bootmem_node - mark a page range as usable
@@ -434,10 +374,6 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size)
{
-#ifdef CONFIG_NO_BOOTMEM
- kmemleak_free_part(__va(physaddr), size);
- memblock_x86_free_range(physaddr, physaddr + size);
-#else
unsigned long start, end;
kmemleak_free_part(__va(physaddr), size);
@@ -446,7 +382,6 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
end = PFN_DOWN(physaddr + size);
mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
-#endif
}
/**
@@ -460,10 +395,6 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
*/
void __init free_bootmem(unsigned long addr, unsigned long size)
{
-#ifdef CONFIG_NO_BOOTMEM
- kmemleak_free_part(__va(addr), size);
- memblock_x86_free_range(addr, addr + size);
-#else
unsigned long start, end;
kmemleak_free_part(__va(addr), size);
@@ -472,7 +403,6 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
end = PFN_DOWN(addr + size);
mark_bootmem(start, end, 0, 0);
-#endif
}
/**
@@ -489,17 +419,12 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size, int flags)
{
-#ifdef CONFIG_NO_BOOTMEM
- panic("no bootmem");
- return 0;
-#else
unsigned long start, end;
start = PFN_DOWN(physaddr);
end = PFN_UP(physaddr + size);
return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
-#endif
}
/**
@@ -515,20 +440,14 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
int __init reserve_bootmem(unsigned long addr, unsigned long size,
int flags)
{
-#ifdef CONFIG_NO_BOOTMEM
- panic("no bootmem");
- return 0;
-#else
unsigned long start, end;
start = PFN_DOWN(addr);
end = PFN_UP(addr + size);
return mark_bootmem(start, end, 1, flags);
-#endif
}
-#ifndef CONFIG_NO_BOOTMEM
int __weak __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
int flags)
{
@@ -685,33 +604,12 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
#endif
return NULL;
}
-#endif
static void * __init ___alloc_bootmem_nopanic(unsigned long size,
unsigned long align,
unsigned long goal,
unsigned long limit)
{
-#ifdef CONFIG_NO_BOOTMEM
- void *ptr;
-
- if (WARN_ON_ONCE(slab_is_available()))
- return kzalloc(size, GFP_NOWAIT);
-
-restart:
-
- ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
-
- if (ptr)
- return ptr;
-
- if (goal != 0) {
- goal = 0;
- goto restart;
- }
-
- return NULL;
-#else
bootmem_data_t *bdata;
void *region;
@@ -737,7 +635,6 @@ restart:
}
return NULL;
-#endif
}
/**
@@ -758,10 +655,6 @@ void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
{
unsigned long limit = 0;
-#ifdef CONFIG_NO_BOOTMEM
- limit = -1UL;
-#endif
-
return ___alloc_bootmem_nopanic(size, align, goal, limit);
}
@@ -798,14 +691,9 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,
{
unsigned long limit = 0;
-#ifdef CONFIG_NO_BOOTMEM
- limit = -1UL;
-#endif
-
return ___alloc_bootmem(size, align, goal, limit);
}
-#ifndef CONFIG_NO_BOOTMEM
static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
unsigned long size, unsigned long align,
unsigned long goal, unsigned long limit)
@@ -822,7 +710,6 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
return ___alloc_bootmem(size, align, goal, limit);
}
-#endif
/**
* __alloc_bootmem_node - allocate boot memory from a specific node
@@ -842,24 +729,10 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
- void *ptr;
-
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-#ifdef CONFIG_NO_BOOTMEM
- ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
- goal, -1ULL);
- if (ptr)
- return ptr;
-
- ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
- goal, -1ULL);
-#else
- ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
-#endif
-
- return ptr;
+ return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
}
void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -880,13 +753,8 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
unsigned long new_goal;
new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
-#ifdef CONFIG_NO_BOOTMEM
- ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
- new_goal, -1ULL);
-#else
ptr = alloc_bootmem_core(pgdat->bdata, size, align,
new_goal, 0);
-#endif
if (ptr)
return ptr;
}
@@ -907,16 +775,6 @@ void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
void * __init alloc_bootmem_section(unsigned long size,
unsigned long section_nr)
{
-#ifdef CONFIG_NO_BOOTMEM
- unsigned long pfn, goal, limit;
-
- pfn = section_nr_to_pfn(section_nr);
- goal = pfn << PAGE_SHIFT;
- limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
-
- return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
- SMP_CACHE_BYTES, goal, limit);
-#else
bootmem_data_t *bdata;
unsigned long pfn, goal, limit;
@@ -926,7 +784,6 @@ void * __init alloc_bootmem_section(unsigned long size,
bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
-#endif
}
#endif
@@ -938,16 +795,11 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-#ifdef CONFIG_NO_BOOTMEM
- ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
- goal, -1ULL);
-#else
ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
if (ptr)
return ptr;
ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
-#endif
if (ptr)
return ptr;
@@ -995,21 +847,9 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
unsigned long align, unsigned long goal)
{
- void *ptr;
-
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-#ifdef CONFIG_NO_BOOTMEM
- ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ return ___alloc_bootmem_node(pgdat->bdata, size, align,
goal, ARCH_LOW_ADDRESS_LIMIT);
- if (ptr)
- return ptr;
- ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
- goal, ARCH_LOW_ADDRESS_LIMIT);
-#else
- ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
- goal, ARCH_LOW_ADDRESS_LIMIT);
-#endif
- return ptr;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3e29781ee762..113e35c47502 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -650,10 +650,10 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag)
static inline struct page *alloc_hugepage_vma(int defrag,
struct vm_area_struct *vma,
- unsigned long haddr)
+ unsigned long haddr, int nd)
{
return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
- HPAGE_PMD_ORDER, vma, haddr);
+ HPAGE_PMD_ORDER, vma, haddr, nd);
}
#ifndef CONFIG_NUMA
@@ -678,7 +678,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(khugepaged_enter(vma)))
return VM_FAULT_OOM;
page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
- vma, haddr);
+ vma, haddr, numa_node_id());
if (unlikely(!page))
goto out;
if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
@@ -799,8 +799,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
}
for (i = 0; i < HPAGE_PMD_NR; i++) {
- pages[i] = alloc_page_vma(GFP_HIGHUSER_MOVABLE,
- vma, address);
+ pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE,
+ vma, address, page_to_nid(page));
if (unlikely(!pages[i] ||
mem_cgroup_newpage_charge(pages[i], mm,
GFP_KERNEL))) {
@@ -902,7 +902,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow())
new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
- vma, haddr);
+ vma, haddr, numa_node_id());
else
new_page = NULL;
@@ -1745,7 +1745,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
static void collapse_huge_page(struct mm_struct *mm,
unsigned long address,
struct page **hpage,
- struct vm_area_struct *vma)
+ struct vm_area_struct *vma,
+ int node)
{
pgd_t *pgd;
pud_t *pud;
@@ -1761,6 +1762,10 @@ static void collapse_huge_page(struct mm_struct *mm,
#ifndef CONFIG_NUMA
VM_BUG_ON(!*hpage);
new_page = *hpage;
+ if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
+ up_read(&mm->mmap_sem);
+ return;
+ }
#else
VM_BUG_ON(*hpage);
/*
@@ -1773,18 +1778,19 @@ static void collapse_huge_page(struct mm_struct *mm,
* mmap_sem in read mode is good idea also to allow greater
* scalability.
*/
- new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address);
+ new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
+ node);
if (unlikely(!new_page)) {
up_read(&mm->mmap_sem);
*hpage = ERR_PTR(-ENOMEM);
return;
}
-#endif
if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
up_read(&mm->mmap_sem);
put_page(new_page);
return;
}
+#endif
/* after allocating the hugepage upgrade to mmap_sem write mode */
up_read(&mm->mmap_sem);
@@ -1919,6 +1925,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
struct page *page;
unsigned long _address;
spinlock_t *ptl;
+ int node = -1;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
@@ -1949,6 +1956,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
page = vm_normal_page(vma, _address, pteval);
if (unlikely(!page))
goto out_unmap;
+ /*
+ * Chose the node of the first page. This could
+ * be more sophisticated and look at more pages,
+ * but isn't for now.
+ */
+ if (node == -1)
+ node = page_to_nid(page);
VM_BUG_ON(PageCompound(page));
if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
goto out_unmap;
@@ -1965,7 +1979,7 @@ out_unmap:
pte_unmap_unlock(pte, ptl);
if (ret)
/* collapse_huge_page will return with the mmap_sem released */
- collapse_huge_page(mm, address, hpage, vma);
+ collapse_huge_page(mm, address, hpage, vma, node);
out:
return ret;
}
diff --git a/mm/memory.c b/mm/memory.c
index 8e8c18324863..5823698c2b71 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2648,6 +2648,7 @@ void unmap_mapping_range(struct address_space *mapping,
details.last_index = ULONG_MAX;
details.i_mmap_lock = &mapping->i_mmap_lock;
+ mutex_lock(&mapping->unmap_mutex);
spin_lock(&mapping->i_mmap_lock);
/* Protect against endless unmapping loops */
@@ -2664,6 +2665,7 @@ void unmap_mapping_range(struct address_space *mapping,
if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
spin_unlock(&mapping->i_mmap_lock);
+ mutex_unlock(&mapping->unmap_mutex);
}
EXPORT_SYMBOL(unmap_mapping_range);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 368fc9d23610..b53ec99f1428 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1524,10 +1524,9 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
}
/* Return a zonelist indicated by gfp for node representing a mempolicy */
-static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy)
+static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
+ int nd)
{
- int nd = numa_node_id();
-
switch (policy->mode) {
case MPOL_PREFERRED:
if (!(policy->flags & MPOL_F_LOCAL))
@@ -1679,7 +1678,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
zl = node_zonelist(interleave_nid(*mpol, vma, addr,
huge_page_shift(hstate_vma(vma))), gfp_flags);
} else {
- zl = policy_zonelist(gfp_flags, *mpol);
+ zl = policy_zonelist(gfp_flags, *mpol, numa_node_id());
if ((*mpol)->mode == MPOL_BIND)
*nodemask = &(*mpol)->v.nodes;
}
@@ -1820,7 +1819,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
*/
struct page *
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
- unsigned long addr)
+ unsigned long addr, int node)
{
struct mempolicy *pol = get_vma_policy(current, vma, addr);
struct zonelist *zl;
@@ -1830,13 +1829,13 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
unsigned nid;
- nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
+ nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
mpol_cond_put(pol);
page = alloc_page_interleave(gfp, order, nid);
put_mems_allowed();
return page;
}
- zl = policy_zonelist(gfp, pol);
+ zl = policy_zonelist(gfp, pol, node);
if (unlikely(mpol_needs_cond_ref(pol))) {
/*
* slow path: ref counted shared policy
@@ -1892,7 +1891,8 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
else
page = __alloc_pages_nodemask(gfp, order,
- policy_zonelist(gfp, pol), policy_nodemask(gfp, pol));
+ policy_zonelist(gfp, pol, numa_node_id()),
+ policy_nodemask(gfp, pol));
put_mems_allowed();
return page;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 766115253807..352de555626c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1287,14 +1287,14 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
return -EPERM;
/* Find the mm_struct */
- read_lock(&tasklist_lock);
+ rcu_read_lock();
task = pid ? find_task_by_vpid(pid) : current;
if (!task) {
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return -ESRCH;
}
mm = get_task_mm(task);
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
if (!mm)
return -EINVAL;
diff --git a/mm/mremap.c b/mm/mremap.c
index 9925b6391b80..1de98d492ddc 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -94,9 +94,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
*/
mapping = vma->vm_file->f_mapping;
spin_lock(&mapping->i_mmap_lock);
- if (new_vma->vm_truncate_count &&
- new_vma->vm_truncate_count != vma->vm_truncate_count)
- new_vma->vm_truncate_count = 0;
+ new_vma->vm_truncate_count = 0;
}
/*
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
new file mode 100644
index 000000000000..e2bdb07079ce
--- /dev/null
+++ b/mm/nobootmem.c
@@ -0,0 +1,435 @@
+/*
+ * bootmem - A boot-time physical memory allocator and configurator
+ *
+ * Copyright (C) 1999 Ingo Molnar
+ * 1999 Kanoj Sarcar, SGI
+ * 2008 Johannes Weiner
+ *
+ * Access to this subsystem has to be serialized externally (which is true
+ * for the boot process anyway).
+ */
+#include <linux/init.h>
+#include <linux/pfn.h>
+#include <linux/slab.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <linux/kmemleak.h>
+#include <linux/range.h>
+#include <linux/memblock.h>
+
+#include <asm/bug.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+
+#include "internal.h"
+
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+struct pglist_data __refdata contig_page_data;
+EXPORT_SYMBOL(contig_page_data);
+#endif
+
+unsigned long max_low_pfn;
+unsigned long min_low_pfn;
+unsigned long max_pfn;
+
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * If we have booted due to a crash, max_pfn will be a very low value. We need
+ * to know the amount of memory that the previous kernel used.
+ */
+unsigned long saved_max_pfn;
+#endif
+
+static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
+ u64 goal, u64 limit)
+{
+ void *ptr;
+ u64 addr;
+
+ if (limit > memblock.current_limit)
+ limit = memblock.current_limit;
+
+ addr = find_memory_core_early(nid, size, align, goal, limit);
+
+ if (addr == MEMBLOCK_ERROR)
+ return NULL;
+
+ ptr = phys_to_virt(addr);
+ memset(ptr, 0, size);
+ memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
+ /*
+ * The min_count is set to 0 so that bootmem allocated blocks
+ * are never reported as leaks.
+ */
+ kmemleak_alloc(ptr, size, 0, 0);
+ return ptr;
+}
+
+/*
+ * free_bootmem_late - free bootmem pages directly to page allocator
+ * @addr: starting address of the range
+ * @size: size of the range in bytes
+ *
+ * This is only useful when the bootmem allocator has already been torn
+ * down, but we are still initializing the system. Pages are given directly
+ * to the page allocator, no bootmem metadata is updated because it is gone.
+ */
+void __init free_bootmem_late(unsigned long addr, unsigned long size)
+{
+ unsigned long cursor, end;
+
+ kmemleak_free_part(__va(addr), size);
+
+ cursor = PFN_UP(addr);
+ end = PFN_DOWN(addr + size);
+
+ for (; cursor < end; cursor++) {
+ __free_pages_bootmem(pfn_to_page(cursor), 0);
+ totalram_pages++;
+ }
+}
+
+static void __init __free_pages_memory(unsigned long start, unsigned long end)
+{
+ int i;
+ unsigned long start_aligned, end_aligned;
+ int order = ilog2(BITS_PER_LONG);
+
+ start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
+ end_aligned = end & ~(BITS_PER_LONG - 1);
+
+ if (end_aligned <= start_aligned) {
+ for (i = start; i < end; i++)
+ __free_pages_bootmem(pfn_to_page(i), 0);
+
+ return;
+ }
+
+ for (i = start; i < start_aligned; i++)
+ __free_pages_bootmem(pfn_to_page(i), 0);
+
+ for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
+ __free_pages_bootmem(pfn_to_page(i), order);
+
+ for (i = end_aligned; i < end; i++)
+ __free_pages_bootmem(pfn_to_page(i), 0);
+}
+
+unsigned long __init free_all_memory_core_early(int nodeid)
+{
+ int i;
+ u64 start, end;
+ unsigned long count = 0;
+ struct range *range = NULL;
+ int nr_range;
+
+ nr_range = get_free_all_memory_range(&range, nodeid);
+
+ for (i = 0; i < nr_range; i++) {
+ start = range[i].start;
+ end = range[i].end;
+ count += end - start;
+ __free_pages_memory(start, end);
+ }
+
+ return count;
+}
+
+/**
+ * free_all_bootmem_node - release a node's free pages to the buddy allocator
+ * @pgdat: node to be released
+ *
+ * Returns the number of pages actually released.
+ */
+unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
+{
+ register_page_bootmem_info_node(pgdat);
+
+ /* free_all_memory_core_early(MAX_NUMNODES) will be called later */
+ return 0;
+}
+
+/**
+ * free_all_bootmem - release free pages to the buddy allocator
+ *
+ * Returns the number of pages actually released.
+ */
+unsigned long __init free_all_bootmem(void)
+{
+ /*
+ * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
+ * because in some case like Node0 doesnt have RAM installed
+ * low ram will be on Node1
+ * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
+ * will be used instead of only Node0 related
+ */
+ return free_all_memory_core_early(MAX_NUMNODES);
+}
+
+/**
+ * free_bootmem_node - mark a page range as usable
+ * @pgdat: node the range resides on
+ * @physaddr: starting address of the range
+ * @size: size of the range in bytes
+ *
+ * Partial pages will be considered reserved and left as they are.
+ *
+ * The range must reside completely on the specified node.
+ */
+void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+ unsigned long size)
+{
+ kmemleak_free_part(__va(physaddr), size);
+ memblock_x86_free_range(physaddr, physaddr + size);
+}
+
+/**
+ * free_bootmem - mark a page range as usable
+ * @addr: starting address of the range
+ * @size: size of the range in bytes
+ *
+ * Partial pages will be considered reserved and left as they are.
+ *
+ * The range must be contiguous but may span node boundaries.
+ */
+void __init free_bootmem(unsigned long addr, unsigned long size)
+{
+ kmemleak_free_part(__va(addr), size);
+ memblock_x86_free_range(addr, addr + size);
+}
+
+static void * __init ___alloc_bootmem_nopanic(unsigned long size,
+ unsigned long align,
+ unsigned long goal,
+ unsigned long limit)
+{
+ void *ptr;
+
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc(size, GFP_NOWAIT);
+
+restart:
+
+ ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
+
+ if (ptr)
+ return ptr;
+
+ if (goal != 0) {
+ goal = 0;
+ goto restart;
+ }
+
+ return NULL;
+}
+
+/**
+ * __alloc_bootmem_nopanic - allocate boot memory without panicking
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may happen on any node in the system.
+ *
+ * Returns NULL on failure.
+ */
+void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
+ unsigned long goal)
+{
+ unsigned long limit = -1UL;
+
+ return ___alloc_bootmem_nopanic(size, align, goal, limit);
+}
+
+static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
+ unsigned long goal, unsigned long limit)
+{
+ void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);
+
+ if (mem)
+ return mem;
+ /*
+ * Whoops, we cannot satisfy the allocation request.
+ */
+ printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
+ panic("Out of memory");
+ return NULL;
+}
+
+/**
+ * __alloc_bootmem - allocate boot memory
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may happen on any node in the system.
+ *
+ * The function panics if the request can not be satisfied.
+ */
+void * __init __alloc_bootmem(unsigned long size, unsigned long align,
+ unsigned long goal)
+{
+ unsigned long limit = -1UL;
+
+ return ___alloc_bootmem(size, align, goal, limit);
+}
+
+/**
+ * __alloc_bootmem_node - allocate boot memory from a specific node
+ * @pgdat: node to allocate from
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may fall back to any node in the system if the specified node
+ * can not hold the requested memory.
+ *
+ * The function panics if the request can not be satisfied.
+ */
+void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
+ unsigned long align, unsigned long goal)
+{
+ void *ptr;
+
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, -1ULL);
+ if (ptr)
+ return ptr;
+
+ return __alloc_memory_core_early(MAX_NUMNODES, size, align,
+ goal, -1ULL);
+}
+
+void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
+ unsigned long align, unsigned long goal)
+{
+#ifdef MAX_DMA32_PFN
+ unsigned long end_pfn;
+
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
+ /* update goal according ...MAX_DMA32_PFN */
+ end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+
+ if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
+ (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
+ void *ptr;
+ unsigned long new_goal;
+
+ new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ new_goal, -1ULL);
+ if (ptr)
+ return ptr;
+ }
+#endif
+
+ return __alloc_bootmem_node(pgdat, size, align, goal);
+
+}
+
+#ifdef CONFIG_SPARSEMEM
+/**
+ * alloc_bootmem_section - allocate boot memory from a specific section
+ * @size: size of the request in bytes
+ * @section_nr: sparse map section to allocate from
+ *
+ * Return NULL on failure.
+ */
+void * __init alloc_bootmem_section(unsigned long size,
+ unsigned long section_nr)
+{
+ unsigned long pfn, goal, limit;
+
+ pfn = section_nr_to_pfn(section_nr);
+ goal = pfn << PAGE_SHIFT;
+ limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
+
+ return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
+ SMP_CACHE_BYTES, goal, limit);
+}
+#endif
+
+void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
+ unsigned long align, unsigned long goal)
+{
+ void *ptr;
+
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, -1ULL);
+ if (ptr)
+ return ptr;
+
+ return __alloc_bootmem_nopanic(size, align, goal);
+}
+
+#ifndef ARCH_LOW_ADDRESS_LIMIT
+#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
+#endif
+
+/**
+ * __alloc_bootmem_low - allocate low boot memory
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may happen on any node in the system.
+ *
+ * The function panics if the request can not be satisfied.
+ */
+void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
+ unsigned long goal)
+{
+ return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
+}
+
+/**
+ * __alloc_bootmem_low_node - allocate low boot memory from a specific node
+ * @pgdat: node to allocate from
+ * @size: size of the request in bytes
+ * @align: alignment of the region
+ * @goal: preferred starting address of the region
+ *
+ * The goal is dropped if it can not be satisfied and the allocation will
+ * fall back to memory below @goal.
+ *
+ * Allocation may fall back to any node in the system if the specified node
+ * can not hold the requested memory.
+ *
+ * The function panics if the request can not be satisfied.
+ */
+void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
+ unsigned long align, unsigned long goal)
+{
+ void *ptr;
+
+ if (WARN_ON_ONCE(slab_is_available()))
+ return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
+
+ ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+ goal, ARCH_LOW_ADDRESS_LIMIT);
+ if (ptr)
+ return ptr;
+
+ return __alloc_memory_core_early(MAX_NUMNODES, size, align,
+ goal, ARCH_LOW_ADDRESS_LIMIT);
+}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a873e61e312e..bd7625676a64 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3699,13 +3699,45 @@ void __init free_bootmem_with_active_regions(int nid,
}
#ifdef CONFIG_HAVE_MEMBLOCK
+/*
+ * Basic iterator support. Return the last range of PFNs for a node
+ * Note: nid == MAX_NUMNODES returns last region regardless of node
+ */
+static int __meminit last_active_region_index_in_nid(int nid)
+{
+ int i;
+
+ for (i = nr_nodemap_entries - 1; i >= 0; i--)
+ if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
+ return i;
+
+ return -1;
+}
+
+/*
+ * Basic iterator support. Return the previous active range of PFNs for a node
+ * Note: nid == MAX_NUMNODES returns next region regardless of node
+ */
+static int __meminit previous_active_region_index_in_nid(int index, int nid)
+{
+ for (index = index - 1; index >= 0; index--)
+ if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
+ return index;
+
+ return -1;
+}
+
+#define for_each_active_range_index_in_nid_reverse(i, nid) \
+ for (i = last_active_region_index_in_nid(nid); i != -1; \
+ i = previous_active_region_index_in_nid(i, nid))
+
u64 __init find_memory_core_early(int nid, u64 size, u64 align,
u64 goal, u64 limit)
{
int i;
/* Need to go over early_node_map to find out good range for node */
- for_each_active_range_index_in_nid(i, nid) {
+ for_each_active_range_index_in_nid_reverse(i, nid) {
u64 addr;
u64 ei_start, ei_last;
u64 final_start, final_end;
@@ -3748,34 +3780,6 @@ int __init add_from_early_node_map(struct range *range, int az,
return nr_range;
}
-#ifdef CONFIG_NO_BOOTMEM
-void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
- u64 goal, u64 limit)
-{
- void *ptr;
- u64 addr;
-
- if (limit > memblock.current_limit)
- limit = memblock.current_limit;
-
- addr = find_memory_core_early(nid, size, align, goal, limit);
-
- if (addr == MEMBLOCK_ERROR)
- return NULL;
-
- ptr = phys_to_virt(addr);
- memset(ptr, 0, size);
- memblock_x86_reserve_range(addr, addr + size, "BOOTMEM");
- /*
- * The min_count is set to 0 so that bootmem allocated blocks
- * are never reported as leaks.
- */
- kmemleak_alloc(ptr, size, 0, 0);
- return ptr;
-}
-#endif
-
-
void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
{
int i;
@@ -4809,15 +4813,6 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
dma_reserve = new_dma_reserve;
}
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-struct pglist_data __refdata contig_page_data = {
-#ifndef CONFIG_NO_BOOTMEM
- .bdata = &bootmem_node_data[0]
-#endif
- };
-EXPORT_SYMBOL(contig_page_data);
-#endif
-
void __init free_area_init(unsigned long *zones_size)
{
free_area_init_node(0, zones_size,
@@ -5376,10 +5371,9 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count)
for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) {
unsigned long check = pfn + iter;
- if (!pfn_valid_within(check)) {
- iter++;
+ if (!pfn_valid_within(check))
continue;
- }
+
page = pfn_to_page(check);
if (!page_count(page)) {
if (PageBuddy(page))
diff --git a/mm/rmap.c b/mm/rmap.c
index f21f4a1d6a1c..941bf82e8961 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -497,41 +497,51 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
int referenced = 0;
- /*
- * Don't want to elevate referenced for mlocked page that gets this far,
- * in order that it progresses to try_to_unmap and is moved to the
- * unevictable list.
- */
- if (vma->vm_flags & VM_LOCKED) {
- *mapcount = 0; /* break early from loop */
- *vm_flags |= VM_LOCKED;
- goto out;
- }
-
- /* Pretend the page is referenced if the task has the
- swap token and is in the middle of a page fault. */
- if (mm != current->mm && has_swap_token(mm) &&
- rwsem_is_locked(&mm->mmap_sem))
- referenced++;
-
if (unlikely(PageTransHuge(page))) {
pmd_t *pmd;
spin_lock(&mm->page_table_lock);
+ /*
+ * rmap might return false positives; we must filter
+ * these out using page_check_address_pmd().
+ */
pmd = page_check_address_pmd(page, mm, address,
PAGE_CHECK_ADDRESS_PMD_FLAG);
- if (pmd && !pmd_trans_splitting(*pmd) &&
- pmdp_clear_flush_young_notify(vma, address, pmd))
+ if (!pmd) {
+ spin_unlock(&mm->page_table_lock);
+ goto out;
+ }
+
+ if (vma->vm_flags & VM_LOCKED) {
+ spin_unlock(&mm->page_table_lock);
+ *mapcount = 0; /* break early from loop */
+ *vm_flags |= VM_LOCKED;
+ goto out;
+ }
+
+ /* go ahead even if the pmd is pmd_trans_splitting() */
+ if (pmdp_clear_flush_young_notify(vma, address, pmd))
referenced++;
spin_unlock(&mm->page_table_lock);
} else {
pte_t *pte;
spinlock_t *ptl;
+ /*
+ * rmap might return false positives; we must filter
+ * these out using page_check_address().
+ */
pte = page_check_address(page, mm, address, &ptl, 0);
if (!pte)
goto out;
+ if (vma->vm_flags & VM_LOCKED) {
+ pte_unmap_unlock(pte, ptl);
+ *mapcount = 0; /* break early from loop */
+ *vm_flags |= VM_LOCKED;
+ goto out;
+ }
+
if (ptep_clear_flush_young_notify(vma, address, pte)) {
/*
* Don't treat a reference through a sequentially read
@@ -546,6 +556,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
pte_unmap_unlock(pte, ptl);
}
+ /* Pretend the page is referenced if the task has the
+ swap token and is in the middle of a page fault. */
+ if (mm != current->mm && has_swap_token(mm) &&
+ rwsem_is_locked(&mm->mmap_sem))
+ referenced++;
+
(*mapcount)--;
if (referenced)
diff --git a/mm/shmem.c b/mm/shmem.c
index 5ee67c990602..41f82bb59eec 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1843,8 +1843,9 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
inode = shmem_get_inode(dir->i_sb, dir, mode, dev, VM_NORESERVE);
if (inode) {
- error = security_inode_init_security(inode, dir, NULL, NULL,
- NULL);
+ error = security_inode_init_security(inode, dir,
+ &dentry->d_name, NULL,
+ NULL, NULL);
if (error) {
if (error != -EOPNOTSUPP) {
iput(inode);
@@ -1983,8 +1984,8 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
if (!inode)
return -ENOSPC;
- error = security_inode_init_security(inode, dir, NULL, NULL,
- NULL);
+ error = security_inode_init_security(inode, dir, &dentry->d_name, NULL,
+ NULL, NULL);
if (error) {
if (error != -EOPNOTSUPP) {
iput(inode);
@@ -2144,8 +2145,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
{
struct inode *inode = dentry->d_inode;
- if (*len < 3)
+ if (*len < 3) {
+ *len = 3;
return 255;
+ }
if (inode_unhashed(inode)) {
/* Unfortunately insert_inode_hash is not idempotent,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 07a458d72fa8..0341c5700e34 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1940,7 +1940,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = -EINVAL;
if (S_ISBLK(inode->i_mode)) {
- bdev = I_BDEV(inode);
+ bdev = bdgrab(I_BDEV(inode));
error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL,
sys_swapon);
if (error < 0) {
diff --git a/mm/truncate.c b/mm/truncate.c
index 49feb46e77b8..d64296be00d3 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -225,6 +225,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
next = start;
while (next <= end &&
pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+ mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
pgoff_t page_index = page->index;
@@ -247,6 +248,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
unlock_page(page);
}
pagevec_release(&pvec);
+ mem_cgroup_uncharge_end();
cond_resched();
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 17497d0cd8b9..6771ea70bfe7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1841,16 +1841,28 @@ static inline bool should_continue_reclaim(struct zone *zone,
if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION))
return false;
- /*
- * If we failed to reclaim and have scanned the full list, stop.
- * NOTE: Checking just nr_reclaimed would exit reclaim/compaction far
- * faster but obviously would be less likely to succeed
- * allocation. If this is desirable, use GFP_REPEAT to decide
- * if both reclaimed and scanned should be checked or just
- * reclaimed
- */
- if (!nr_reclaimed && !nr_scanned)
- return false;
+ /* Consider stopping depending on scan and reclaim activity */
+ if (sc->gfp_mask & __GFP_REPEAT) {
+ /*
+ * For __GFP_REPEAT allocations, stop reclaiming if the
+ * full LRU list has been scanned and we are still failing
+ * to reclaim pages. This full LRU scan is potentially
+ * expensive but a __GFP_REPEAT caller really wants to succeed
+ */
+ if (!nr_reclaimed && !nr_scanned)
+ return false;
+ } else {
+ /*
+ * For non-__GFP_REPEAT allocations which can presumably
+ * fail without consequence, stop if we failed to reclaim
+ * any pages from the last SWAP_CLUSTER_MAX number of
+ * pages that were scanned. This will return to the
+ * caller faster at the risk reclaim/compaction and
+ * the resulting allocation attempt fails
+ */
+ if (!nr_reclaimed)
+ return false;
+ }
/*
* If we have not reclaimed enough pages for compaction and the
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 198a640d53a6..a0874cc1f718 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
util.o \
protocol.o \
trans_fd.o \
+ trans_common.o \
9pnet_virtio-objs := \
trans_virtio.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index a848bca9fbff..347ec0cd2718 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -229,10 +229,23 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
return ERR_PTR(-ENOMEM);
}
init_waitqueue_head(req->wq);
- req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
- GFP_KERNEL);
- req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
- GFP_KERNEL);
+ if ((c->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+ P9_TRANS_PREF_PAYLOAD_SEP) {
+ int alloc_msize = min(c->msize, 4096);
+ req->tc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
+ GFP_KERNEL);
+ req->tc->capacity = alloc_msize;
+ req->rc = kmalloc(sizeof(struct p9_fcall)+alloc_msize,
+ GFP_KERNEL);
+ req->rc->capacity = alloc_msize;
+ } else {
+ req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+ GFP_KERNEL);
+ req->tc->capacity = c->msize;
+ req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize,
+ GFP_KERNEL);
+ req->rc->capacity = c->msize;
+ }
if ((!req->tc) || (!req->rc)) {
printk(KERN_ERR "Couldn't grow tag array\n");
kfree(req->tc);
@@ -243,9 +256,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
return ERR_PTR(-ENOMEM);
}
req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
- req->tc->capacity = c->msize;
req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
- req->rc->capacity = c->msize;
}
p9pdu_reset(req->tc);
@@ -443,6 +454,7 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
{
int8_t type;
int err;
+ int ecode;
err = p9_parse_header(req->rc, NULL, &type, NULL, 0);
if (err) {
@@ -450,36 +462,53 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
return err;
}
- if (type == P9_RERROR || type == P9_RLERROR) {
- int ecode;
-
- if (!p9_is_proto_dotl(c)) {
- char *ename;
+ if (type != P9_RERROR && type != P9_RLERROR)
+ return 0;
- err = p9pdu_readf(req->rc, c->proto_version, "s?d",
- &ename, &ecode);
- if (err)
- goto out_err;
+ if (!p9_is_proto_dotl(c)) {
+ char *ename;
+
+ if (req->tc->pbuf_size) {
+ /* Handle user buffers */
+ size_t len = req->rc->size - req->rc->offset;
+ if (req->tc->pubuf) {
+ /* User Buffer */
+ err = copy_from_user(
+ &req->rc->sdata[req->rc->offset],
+ req->tc->pubuf, len);
+ if (err) {
+ err = -EFAULT;
+ goto out_err;
+ }
+ } else {
+ /* Kernel Buffer */
+ memmove(&req->rc->sdata[req->rc->offset],
+ req->tc->pkbuf, len);
+ }
+ }
+ err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+ &ename, &ecode);
+ if (err)
+ goto out_err;
- if (p9_is_proto_dotu(c))
- err = -ecode;
+ if (p9_is_proto_dotu(c))
+ err = -ecode;
- if (!err || !IS_ERR_VALUE(err)) {
- err = p9_errstr2errno(ename, strlen(ename));
+ if (!err || !IS_ERR_VALUE(err)) {
+ err = p9_errstr2errno(ename, strlen(ename));
- P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename);
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode,
+ ename);
- kfree(ename);
- }
- } else {
- err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
- err = -ecode;
-
- P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+ kfree(ename);
}
+ } else {
+ err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode);
+ err = -ecode;
+
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
+ }
- } else
- err = 0;
return err;
@@ -1191,6 +1220,27 @@ error:
}
EXPORT_SYMBOL(p9_client_fsync);
+int p9_client_sync_fs(struct p9_fid *fid)
+{
+ int err = 0;
+ struct p9_req_t *req;
+ struct p9_client *clnt;
+
+ P9_DPRINTK(P9_DEBUG_9P, ">>> TSYNC_FS fid %d\n", fid->fid);
+
+ clnt = fid->clnt;
+ req = p9_client_rpc(clnt, P9_TSYNCFS, "d", fid->fid);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto error;
+ }
+ P9_DPRINTK(P9_DEBUG_9P, "<<< RSYNCFS fid %d\n", fid->fid);
+ p9_free_req(clnt, req);
+error:
+ return err;
+}
+EXPORT_SYMBOL(p9_client_sync_fs);
+
int p9_client_clunk(struct p9_fid *fid)
{
int err;
@@ -1270,7 +1320,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
if (count < rsize)
rsize = count;
- req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize);
+ /* Don't bother zerocopy form small IO (< 1024) */
+ if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+ P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
+ req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset,
+ rsize, data, udata);
+ } else {
+ req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset,
+ rsize);
+ }
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1284,13 +1342,15 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
- if (data) {
- memmove(data, dataptr, count);
- } else {
- err = copy_to_user(udata, dataptr, count);
- if (err) {
- err = -EFAULT;
- goto free_and_error;
+ if (!req->tc->pbuf_size) {
+ if (data) {
+ memmove(data, dataptr, count);
+ } else {
+ err = copy_to_user(udata, dataptr, count);
+ if (err) {
+ err = -EFAULT;
+ goto free_and_error;
+ }
}
}
p9_free_req(clnt, req);
@@ -1323,12 +1383,21 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
if (count < rsize)
rsize = count;
- if (data)
- req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset,
- rsize, data);
- else
- req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset,
- rsize, udata);
+
+ /* Don't bother zerocopy form small IO (< 1024) */
+ if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+ P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
+ req = p9_client_rpc(clnt, P9_TWRITE, "dqE", fid->fid, offset,
+ rsize, data, udata);
+ } else {
+
+ if (data)
+ req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid,
+ offset, rsize, data);
+ else
+ req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid,
+ offset, rsize, udata);
+ }
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1716,7 +1785,14 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
if (count < rsize)
rsize = count;
- req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize);
+ if ((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
+ P9_TRANS_PREF_PAYLOAD_SEP) {
+ req = p9_client_rpc(clnt, P9_TREADDIR, "dqF", fid->fid,
+ offset, rsize, data);
+ } else {
+ req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid,
+ offset, rsize);
+ }
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto error;
@@ -1730,7 +1806,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
- if (data)
+ if (!req->tc->pbuf_size && data)
memmove(data, dataptr, count);
p9_free_req(clnt, req);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 1e308f210928..2ce515b859b3 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -114,6 +114,26 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
return size - len;
}
+static size_t
+pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata,
+ size_t size)
+{
+ BUG_ON(pdu->size > P9_IOHDRSZ);
+ pdu->pubuf = (char __user *)udata;
+ pdu->pkbuf = (char *)kdata;
+ pdu->pbuf_size = size;
+ return 0;
+}
+
+static size_t
+pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size)
+{
+ BUG_ON(pdu->size > P9_READDIRHDRSZ);
+ pdu->pkbuf = (char *)kdata;
+ pdu->pbuf_size = size;
+ return 0;
+}
+
/*
b - int8_t
w - int16_t
@@ -445,6 +465,25 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
errcode = -EFAULT;
}
break;
+ case 'E':{
+ int32_t cnt = va_arg(ap, int32_t);
+ const char *k = va_arg(ap, const void *);
+ const char *u = va_arg(ap, const void *);
+ errcode = p9pdu_writef(pdu, proto_version, "d",
+ cnt);
+ if (!errcode && pdu_write_urw(pdu, k, u, cnt))
+ errcode = -EFAULT;
+ }
+ break;
+ case 'F':{
+ int32_t cnt = va_arg(ap, int32_t);
+ const char *k = va_arg(ap, const void *);
+ errcode = p9pdu_writef(pdu, proto_version, "d",
+ cnt);
+ if (!errcode && pdu_write_readdir(pdu, k, cnt))
+ errcode = -EFAULT;
+ }
+ break;
case 'U':{
int32_t count = va_arg(ap, int32_t);
const char __user *udata =
@@ -579,6 +618,7 @@ EXPORT_SYMBOL(p9stat_read);
int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
{
+ pdu->id = type;
return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
}
@@ -606,6 +646,10 @@ void p9pdu_reset(struct p9_fcall *pdu)
{
pdu->offset = 0;
pdu->size = 0;
+ pdu->private = NULL;
+ pdu->pubuf = NULL;
+ pdu->pkbuf = NULL;
+ pdu->pbuf_size = 0;
}
int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
new file mode 100644
index 000000000000..d62b9aa58df8
--- /dev/null
+++ b/net/9p/trans_common.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include <linux/scatterlist.h>
+#include "trans_common.h"
+
+/**
+ * p9_release_req_pages - Release pages after the transaction.
+ * @*private: PDU's private page of struct trans_rpage_info
+ */
+void
+p9_release_req_pages(struct trans_rpage_info *rpinfo)
+{
+ int i = 0;
+
+ while (rpinfo->rp_data[i] && rpinfo->rp_nr_pages--) {
+ put_page(rpinfo->rp_data[i]);
+ i++;
+ }
+}
+EXPORT_SYMBOL(p9_release_req_pages);
+
+/**
+ * p9_nr_pages - Return number of pages needed to accomodate the payload.
+ */
+int
+p9_nr_pages(struct p9_req_t *req)
+{
+ int start_page, end_page;
+ start_page = (unsigned long long)req->tc->pubuf >> PAGE_SHIFT;
+ end_page = ((unsigned long long)req->tc->pubuf + req->tc->pbuf_size +
+ PAGE_SIZE - 1) >> PAGE_SHIFT;
+ return end_page - start_page;
+}
+EXPORT_SYMBOL(p9_nr_pages);
+
+/**
+ * payload_gup - Translates user buffer into kernel pages and
+ * pins them either for read/write through get_user_pages_fast().
+ * @req: Request to be sent to server.
+ * @pdata_off: data offset into the first page after translation (gup).
+ * @pdata_len: Total length of the IO. gup may not return requested # of pages.
+ * @nr_pages: number of pages to accomodate the payload
+ * @rw: Indicates if the pages are for read or write.
+ */
+int
+p9_payload_gup(struct p9_req_t *req, size_t *pdata_off, int *pdata_len,
+ int nr_pages, u8 rw)
+{
+ uint32_t first_page_bytes = 0;
+ uint32_t pdata_mapped_pages;
+ struct trans_rpage_info *rpinfo;
+
+ *pdata_off = (size_t)req->tc->pubuf & (PAGE_SIZE-1);
+
+ if (*pdata_off)
+ first_page_bytes = min((PAGE_SIZE - *pdata_off),
+ req->tc->pbuf_size);
+
+ rpinfo = req->tc->private;
+ pdata_mapped_pages = get_user_pages_fast((unsigned long)req->tc->pubuf,
+ nr_pages, rw, &rpinfo->rp_data[0]);
+
+ if (pdata_mapped_pages < 0) {
+ printk(KERN_ERR "get_user_pages_fast failed:%d udata:%p"
+ "nr_pages:%d\n", pdata_mapped_pages,
+ req->tc->pubuf, nr_pages);
+ pdata_mapped_pages = 0;
+ return -EIO;
+ }
+ rpinfo->rp_nr_pages = pdata_mapped_pages;
+ if (*pdata_off) {
+ *pdata_len = first_page_bytes;
+ *pdata_len += min((req->tc->pbuf_size - *pdata_len),
+ ((size_t)pdata_mapped_pages - 1) << PAGE_SHIFT);
+ } else {
+ *pdata_len = min(req->tc->pbuf_size,
+ (size_t)pdata_mapped_pages << PAGE_SHIFT);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(p9_payload_gup);
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
new file mode 100644
index 000000000000..76309223bb02
--- /dev/null
+++ b/net/9p/trans_common.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/* TRUE if it is user context */
+#define P9_IS_USER_CONTEXT (!segment_eq(get_fs(), KERNEL_DS))
+
+/**
+ * struct trans_rpage_info - To store mapped page information in PDU.
+ * @rp_alloc:Set if this structure is allocd, not a reuse unused space in pdu.
+ * @rp_nr_pages: Number of mapped pages
+ * @rp_data: Array of page pointers
+ */
+struct trans_rpage_info {
+ u8 rp_alloc;
+ int rp_nr_pages;
+ struct page *rp_data[0];
+};
+
+void p9_release_req_pages(struct trans_rpage_info *);
+int p9_payload_gup(struct p9_req_t *, size_t *, int *, int, u8);
+int p9_nr_pages(struct p9_req_t *);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 078eb162d9bf..a30471e51740 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -153,10 +153,11 @@ struct p9_conn {
unsigned long wsched;
};
+static void p9_poll_workfn(struct work_struct *work);
+
static DEFINE_SPINLOCK(p9_poll_lock);
static LIST_HEAD(p9_poll_pending_list);
-static struct workqueue_struct *p9_mux_wq;
-static struct task_struct *p9_poll_task;
+static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
static void p9_mux_poll_stop(struct p9_conn *m)
{
@@ -384,7 +385,7 @@ static void p9_read_work(struct work_struct *work)
if (n & POLLIN) {
P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
- queue_work(p9_mux_wq, &m->rq);
+ schedule_work(&m->rq);
} else
clear_bit(Rworksched, &m->wsched);
} else
@@ -497,7 +498,7 @@ static void p9_write_work(struct work_struct *work)
if (n & POLLOUT) {
P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
- queue_work(p9_mux_wq, &m->wq);
+ schedule_work(&m->wq);
} else
clear_bit(Wworksched, &m->wsched);
} else
@@ -516,15 +517,14 @@ static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
container_of(wait, struct p9_poll_wait, wait);
struct p9_conn *m = pwait->conn;
unsigned long flags;
- DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);
spin_lock_irqsave(&p9_poll_lock, flags);
if (list_empty(&m->poll_pending_link))
list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);
spin_unlock_irqrestore(&p9_poll_lock, flags);
- /* perform the default wake up operation */
- return default_wake_function(&dummy_wait, mode, sync, key);
+ schedule_work(&p9_poll_work);
+ return 1;
}
/**
@@ -629,7 +629,7 @@ static void p9_poll_mux(struct p9_conn *m)
P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m);
if (!test_and_set_bit(Rworksched, &m->wsched)) {
P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m);
- queue_work(p9_mux_wq, &m->rq);
+ schedule_work(&m->rq);
}
}
@@ -639,7 +639,7 @@ static void p9_poll_mux(struct p9_conn *m)
if ((m->wsize || !list_empty(&m->unsent_req_list)) &&
!test_and_set_bit(Wworksched, &m->wsched)) {
P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m);
- queue_work(p9_mux_wq, &m->wq);
+ schedule_work(&m->wq);
}
}
}
@@ -677,7 +677,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
n = p9_fd_poll(m->client, NULL);
if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
- queue_work(p9_mux_wq, &m->wq);
+ schedule_work(&m->wq);
return 0;
}
@@ -1047,12 +1047,12 @@ static struct p9_trans_module p9_fd_trans = {
*
*/
-static int p9_poll_proc(void *a)
+static void p9_poll_workfn(struct work_struct *work)
{
unsigned long flags;
P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current);
- repeat:
+
spin_lock_irqsave(&p9_poll_lock, flags);
while (!list_empty(&p9_poll_pending_list)) {
struct p9_conn *conn = list_first_entry(&p9_poll_pending_list,
@@ -1067,35 +1067,11 @@ static int p9_poll_proc(void *a)
}
spin_unlock_irqrestore(&p9_poll_lock, flags);
- set_current_state(TASK_INTERRUPTIBLE);
- if (list_empty(&p9_poll_pending_list)) {
- P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n");
- schedule();
- }
- __set_current_state(TASK_RUNNING);
-
- if (!kthread_should_stop())
- goto repeat;
-
P9_DPRINTK(P9_DEBUG_TRANS, "finish\n");
- return 0;
}
int p9_trans_fd_init(void)
{
- p9_mux_wq = create_workqueue("v9fs");
- if (!p9_mux_wq) {
- printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n");
- return -ENOMEM;
- }
-
- p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll");
- if (IS_ERR(p9_poll_task)) {
- destroy_workqueue(p9_mux_wq);
- printk(KERN_WARNING "v9fs: mux: creating poll task failed\n");
- return PTR_ERR(p9_poll_task);
- }
-
v9fs_register_trans(&p9_tcp_trans);
v9fs_register_trans(&p9_unix_trans);
v9fs_register_trans(&p9_fd_trans);
@@ -1105,10 +1081,8 @@ int p9_trans_fd_init(void)
void p9_trans_fd_exit(void)
{
- kthread_stop(p9_poll_task);
+ flush_work_sync(&p9_poll_work);
v9fs_unregister_trans(&p9_tcp_trans);
v9fs_unregister_trans(&p9_unix_trans);
v9fs_unregister_trans(&p9_fd_trans);
-
- destroy_workqueue(p9_mux_wq);
}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index c8f3f72ab20e..9b550ed9c711 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -45,6 +45,7 @@
#include <linux/scatterlist.h>
#include <linux/virtio.h>
#include <linux/virtio_9p.h>
+#include "trans_common.h"
#define VIRTQUEUE_NUM 128
@@ -155,6 +156,14 @@ static void req_done(struct virtqueue *vq)
rc->tag);
req = p9_tag_lookup(chan->client, rc->tag);
req->status = REQ_STATUS_RCVD;
+ if (req->tc->private) {
+ struct trans_rpage_info *rp = req->tc->private;
+ /*Release pages */
+ p9_release_req_pages(rp);
+ if (rp->rp_alloc)
+ kfree(rp);
+ req->tc->private = NULL;
+ }
p9_client_cb(chan->client, req);
} else {
spin_unlock_irqrestore(&chan->lock, flags);
@@ -203,6 +212,38 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
}
/**
+ * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
+ * this takes a list of pages.
+ * @sg: scatter/gather list to pack into
+ * @start: which segment of the sg_list to start at
+ * @pdata_off: Offset into the first page
+ * @**pdata: a list of pages to add into sg.
+ * @count: amount of data to pack into the scatter/gather list
+ */
+static int
+pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
+ struct page **pdata, int count)
+{
+ int s;
+ int i = 0;
+ int index = start;
+
+ if (pdata_off) {
+ s = min((int)(PAGE_SIZE - pdata_off), count);
+ sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
+ count -= s;
+ }
+
+ while (count) {
+ BUG_ON(index > limit);
+ s = min((int)PAGE_SIZE, count);
+ sg_set_page(&sg[index++], pdata[i++], s, 0);
+ count -= s;
+ }
+ return index-start;
+}
+
+/**
* p9_virtio_request - issue a request
* @client: client instance issuing the request
* @req: request to be issued
@@ -212,22 +253,97 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
static int
p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
{
- int in, out;
+ int in, out, inp, outp;
struct virtio_chan *chan = client->trans;
char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
unsigned long flags;
- int err;
+ size_t pdata_off = 0;
+ struct trans_rpage_info *rpinfo = NULL;
+ int err, pdata_len = 0;
P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
req_retry:
req->status = REQ_STATUS_SENT;
+ if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
+ int nr_pages = p9_nr_pages(req);
+ int rpinfo_size = sizeof(struct trans_rpage_info) +
+ sizeof(struct page *) * nr_pages;
+
+ if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
+ /* We can use sdata */
+ req->tc->private = req->tc->sdata + req->tc->size;
+ rpinfo = (struct trans_rpage_info *)req->tc->private;
+ rpinfo->rp_alloc = 0;
+ } else {
+ req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
+ if (!req->tc->private) {
+ P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
+ "private kmalloc returned NULL");
+ return -ENOMEM;
+ }
+ rpinfo = (struct trans_rpage_info *)req->tc->private;
+ rpinfo->rp_alloc = 1;
+ }
+
+ err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
+ req->tc->id == P9_TREAD ? 1 : 0);
+ if (err < 0) {
+ if (rpinfo->rp_alloc)
+ kfree(rpinfo);
+ return err;
+ }
+ }
+
spin_lock_irqsave(&chan->lock, flags);
+
+ /* Handle out VirtIO ring buffers */
out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
- req->tc->size);
- in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata,
- client->msize);
+ req->tc->size);
+
+ if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
+ /* We have additional write payload buffer to take care */
+ if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
+ outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
+ pdata_off, rpinfo->rp_data, pdata_len);
+ } else {
+ char *pbuf = req->tc->pubuf ? req->tc->pubuf :
+ req->tc->pkbuf;
+ outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
+ req->tc->pbuf_size);
+ }
+ out += outp;
+ }
+
+ /* Handle in VirtIO ring buffers */
+ if (req->tc->pbuf_size &&
+ ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
+ /*
+ * Take care of additional Read payload.
+ * 11 is the read/write header = PDU Header(7) + IO Size (4).
+ * Arrange in such a way that server places header in the
+ * alloced memory and payload onto the user buffer.
+ */
+ inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11);
+ /*
+ * Running executables in the filesystem may result in
+ * a read request with kernel buffer as opposed to user buffer.
+ */
+ if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
+ in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
+ pdata_off, rpinfo->rp_data, pdata_len);
+ } else {
+ char *pbuf = req->tc->pubuf ? req->tc->pubuf :
+ req->tc->pkbuf;
+ in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
+ pbuf, req->tc->pbuf_size);
+ }
+ in += inp;
+ } else {
+ in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata,
+ client->msize);
+ }
err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
if (err < 0) {
@@ -246,6 +362,8 @@ req_retry:
P9_DPRINTK(P9_DEBUG_TRANS,
"9p debug: "
"virtio rpc add_buf returned failure");
+ if (rpinfo && rpinfo->rp_alloc)
+ kfree(rpinfo);
return -EIO;
}
}
@@ -448,6 +566,7 @@ static struct p9_trans_module p9_virtio_trans = {
.request = p9_virtio_request,
.cancel = p9_virtio_cancel,
.maxsize = PAGE_SIZE*16,
+ .pref = P9_TRANS_PREF_PAYLOAD_SEP,
.def = 0,
.owner = THIS_MODULE,
};
diff --git a/net/Makefile b/net/Makefile
index a3330ebe2c53..a51d9465e628 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -19,9 +19,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_XFRM) += xfrm/
obj-$(CONFIG_UNIX) += unix/
-ifneq ($(CONFIG_IPV6),)
-obj-y += ipv6/
-endif
+obj-$(CONFIG_NET) += ipv6/
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7550abb0c96a..675614e38e14 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -859,6 +859,7 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
result = L2CAP_CR_SEC_BLOCK;
else
result = L2CAP_CR_BAD_PSM;
+ sk->sk_state = BT_DISCONN;
rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 2575c2db6404..d7b9af4703d0 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -727,7 +727,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
break;
}
+ tty_unlock();
schedule();
+ tty_lock();
}
set_current_state(TASK_RUNNING);
remove_wait_queue(&dev->wait, &wait);
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 9190ae462cb4..6dee7bf648a9 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -6,6 +6,7 @@ config BRIDGE
tristate "802.1d Ethernet Bridging"
select LLC
select STP
+ depends on IPV6 || IPV6=n
---help---
If you say Y here, then your Linux box will be able to act as an
Ethernet bridge, which means that the different Ethernet segments it
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 6f6d8e1b776f..88e4aa9cb1f9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -80,7 +80,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
if (is_multicast_ether_addr(dest)) {
mdst = br_mdb_get(br, skb);
if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) {
- if ((mdst && !hlist_unhashed(&mdst->mglist)) ||
+ if ((mdst && mdst->mglist) ||
br_multicast_is_router(br))
skb2 = skb;
br_multicast_forward(mdst, skb, skb2);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index f701a21acb34..030a002ff8ee 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -37,10 +37,9 @@
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static inline int ipv6_is_local_multicast(const struct in6_addr *addr)
+static inline int ipv6_is_transient_multicast(const struct in6_addr *addr)
{
- if (ipv6_addr_is_multicast(addr) &&
- IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL)
+ if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr))
return 1;
return 0;
}
@@ -232,8 +231,7 @@ static void br_multicast_group_expired(unsigned long data)
if (!netif_running(br->dev) || timer_pending(&mp->timer))
goto out;
- if (!hlist_unhashed(&mp->mglist))
- hlist_del_init(&mp->mglist);
+ mp->mglist = false;
if (mp->ports)
goto out;
@@ -276,7 +274,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
del_timer(&p->query_timer);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
- if (!mp->ports && hlist_unhashed(&mp->mglist) &&
+ if (!mp->ports && !mp->mglist &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
@@ -436,7 +434,6 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
eth = eth_hdr(skb);
memcpy(eth->h_source, br->dev->dev_addr, 6);
- ipv6_eth_mc_map(group, eth->h_dest);
eth->h_proto = htons(ETH_P_IPV6);
skb_put(skb, sizeof(*eth));
@@ -448,8 +445,10 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
ip6h->payload_len = htons(8 + sizeof(*mldq));
ip6h->nexthdr = IPPROTO_HOPOPTS;
ip6h->hop_limit = 1;
- ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0);
+ ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
+ &ip6h->saddr);
ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
+ ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
hopopt = (u8 *)(ip6h + 1);
hopopt[0] = IPPROTO_ICMPV6; /* next hdr */
@@ -528,7 +527,7 @@ static void br_multicast_group_query_expired(unsigned long data)
struct net_bridge *br = mp->br;
spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) || hlist_unhashed(&mp->mglist) ||
+ if (!netif_running(br->dev) || !mp->mglist ||
mp->queries_sent >= br->multicast_last_member_count)
goto out;
@@ -719,7 +718,7 @@ static int br_multicast_add_group(struct net_bridge *br,
goto err;
if (!port) {
- hlist_add_head(&mp->mglist, &br->mglist);
+ mp->mglist = true;
mod_timer(&mp->timer, now + br->multicast_membership_interval);
goto out;
}
@@ -781,11 +780,11 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
{
struct br_ip br_group;
- if (ipv6_is_local_multicast(group))
+ if (!ipv6_is_transient_multicast(group))
return 0;
ipv6_addr_copy(&br_group.u.ip6, group);
- br_group.proto = htons(ETH_P_IP);
+ br_group.proto = htons(ETH_P_IPV6);
return br_multicast_add_group(br, port, &br_group);
}
@@ -1014,18 +1013,19 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
nsrcs = skb_header_pointer(skb,
len + offsetof(struct mld2_grec,
- grec_mca),
+ grec_nsrcs),
sizeof(_nsrcs), &_nsrcs);
if (!nsrcs)
return -EINVAL;
if (!pskb_may_pull(skb,
len + sizeof(*grec) +
- sizeof(struct in6_addr) * (*nsrcs)))
+ sizeof(struct in6_addr) * ntohs(*nsrcs)))
return -EINVAL;
grec = (struct mld2_grec *)(skb->data + len);
- len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs);
+ len += sizeof(*grec) +
+ sizeof(struct in6_addr) * ntohs(*nsrcs);
/* We treat these as MLDv1 reports for now. */
switch (grec->grec_type) {
@@ -1165,7 +1165,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
max_delay *= br->multicast_last_member_count;
- if (!hlist_unhashed(&mp->mglist) &&
+ if (mp->mglist &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1177,7 +1177,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
try_to_del_timer_sync(&p->timer) >= 0)
- mod_timer(&mp->timer, now + max_delay);
+ mod_timer(&p->timer, now + max_delay);
}
out:
@@ -1236,7 +1236,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
goto out;
max_delay *= br->multicast_last_member_count;
- if (!hlist_unhashed(&mp->mglist) &&
+ if (mp->mglist &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
try_to_del_timer_sync(&mp->timer) >= 0))
@@ -1248,7 +1248,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
try_to_del_timer_sync(&p->timer) >= 0)
- mod_timer(&mp->timer, now + max_delay);
+ mod_timer(&p->timer, now + max_delay);
}
out:
@@ -1283,7 +1283,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
br->multicast_last_member_interval;
if (!port) {
- if (!hlist_unhashed(&mp->mglist) &&
+ if (mp->mglist &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, time) :
try_to_del_timer_sync(&mp->timer) >= 0)) {
@@ -1341,7 +1341,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
{
struct br_ip br_group;
- if (ipv6_is_local_multicast(group))
+ if (!ipv6_is_transient_multicast(group))
return;
ipv6_addr_copy(&br_group.u.ip6, group);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 84aac7734bfc..4e1b620b6be6 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -84,13 +84,13 @@ struct net_bridge_port_group {
struct net_bridge_mdb_entry
{
struct hlist_node hlist[2];
- struct hlist_node mglist;
struct net_bridge *br;
struct net_bridge_port_group __rcu *ports;
struct rcu_head rcu;
struct timer_list timer;
struct timer_list query_timer;
struct br_ip addr;
+ bool mglist;
u32 queries_sent;
};
@@ -238,7 +238,6 @@ struct net_bridge
spinlock_t multicast_lock;
struct net_bridge_mdb_htable __rcu *mdb;
struct hlist_head router_list;
- struct hlist_head mglist;
struct timer_list multicast_router_timer;
struct timer_list multicast_querier_timer;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index dff633d62e5b..05f357828a2f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -252,8 +252,12 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
{
struct kvec iov = {buf, len};
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+ int r;
- return kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
+ r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
+ if (r == -EAGAIN)
+ r = 0;
+ return r;
}
/*
@@ -264,13 +268,17 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
size_t kvlen, size_t len, int more)
{
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+ int r;
if (more)
msg.msg_flags |= MSG_MORE;
else
msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
- return kernel_sendmsg(sock, &msg, iov, kvlen, len);
+ r = kernel_sendmsg(sock, &msg, iov, kvlen, len);
+ if (r == -EAGAIN)
+ r = 0;
+ return r;
}
@@ -328,7 +336,6 @@ static void reset_connection(struct ceph_connection *con)
ceph_msg_put(con->out_msg);
con->out_msg = NULL;
}
- con->out_keepalive_pending = false;
con->in_seq = 0;
con->in_seq_acked = 0;
}
@@ -847,6 +854,8 @@ static int write_partial_msg_pages(struct ceph_connection *con)
(msg->pages || msg->pagelist || msg->bio || in_trail))
kunmap(page);
+ if (ret == -EAGAIN)
+ ret = 0;
if (ret <= 0)
goto out;
@@ -1238,8 +1247,6 @@ static int process_connect(struct ceph_connection *con)
con->auth_retry);
if (con->auth_retry == 2) {
con->error_msg = "connect authorization failure";
- reset_connection(con);
- set_bit(CLOSED, &con->state);
return -1;
}
con->auth_retry = 1;
@@ -1705,14 +1712,6 @@ more:
/* open the socket first? */
if (con->sock == NULL) {
- /*
- * if we were STANDBY and are reconnecting _this_
- * connection, bump connect_seq now. Always bump
- * global_seq.
- */
- if (test_and_clear_bit(STANDBY, &con->state))
- con->connect_seq++;
-
prepare_write_banner(msgr, con);
prepare_write_connect(msgr, con, 1);
prepare_read_banner(con);
@@ -1737,16 +1736,12 @@ more_kvec:
if (con->out_skip) {
ret = write_partial_skip(con);
if (ret <= 0)
- goto done;
- if (ret < 0) {
- dout("try_write write_partial_skip err %d\n", ret);
- goto done;
- }
+ goto out;
}
if (con->out_kvec_left) {
ret = write_partial_kvec(con);
if (ret <= 0)
- goto done;
+ goto out;
}
/* msg pages? */
@@ -1761,11 +1756,11 @@ more_kvec:
if (ret == 1)
goto more_kvec; /* we need to send the footer, too! */
if (ret == 0)
- goto done;
+ goto out;
if (ret < 0) {
dout("try_write write_partial_msg_pages err %d\n",
ret);
- goto done;
+ goto out;
}
}
@@ -1789,10 +1784,9 @@ do_next:
/* Nothing to do! */
clear_bit(WRITE_PENDING, &con->state);
dout("try_write nothing else to write.\n");
-done:
ret = 0;
out:
- dout("try_write done on %p\n", con);
+ dout("try_write done on %p ret %d\n", con, ret);
return ret;
}
@@ -1821,19 +1815,17 @@ more:
dout("try_read connecting\n");
ret = read_partial_banner(con);
if (ret <= 0)
- goto done;
- if (process_banner(con) < 0) {
- ret = -1;
goto out;
- }
+ ret = process_banner(con);
+ if (ret < 0)
+ goto out;
}
ret = read_partial_connect(con);
if (ret <= 0)
- goto done;
- if (process_connect(con) < 0) {
- ret = -1;
goto out;
- }
+ ret = process_connect(con);
+ if (ret < 0)
+ goto out;
goto more;
}
@@ -1848,7 +1840,7 @@ more:
dout("skipping %d / %d bytes\n", skip, -con->in_base_pos);
ret = ceph_tcp_recvmsg(con->sock, buf, skip);
if (ret <= 0)
- goto done;
+ goto out;
con->in_base_pos += ret;
if (con->in_base_pos)
goto more;
@@ -1859,7 +1851,7 @@ more:
*/
ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1);
if (ret <= 0)
- goto done;
+ goto out;
dout("try_read got tag %d\n", (int)con->in_tag);
switch (con->in_tag) {
case CEPH_MSGR_TAG_MSG:
@@ -1870,7 +1862,7 @@ more:
break;
case CEPH_MSGR_TAG_CLOSE:
set_bit(CLOSED, &con->state); /* fixme */
- goto done;
+ goto out;
default:
goto bad_tag;
}
@@ -1882,13 +1874,12 @@ more:
case -EBADMSG:
con->error_msg = "bad crc";
ret = -EIO;
- goto out;
+ break;
case -EIO:
con->error_msg = "io error";
- goto out;
- default:
- goto done;
+ break;
}
+ goto out;
}
if (con->in_tag == CEPH_MSGR_TAG_READY)
goto more;
@@ -1898,15 +1889,13 @@ more:
if (con->in_tag == CEPH_MSGR_TAG_ACK) {
ret = read_partial_ack(con);
if (ret <= 0)
- goto done;
+ goto out;
process_ack(con);
goto more;
}
-done:
- ret = 0;
out:
- dout("try_read done on %p\n", con);
+ dout("try_read done on %p ret %d\n", con, ret);
return ret;
bad_tag:
@@ -1951,7 +1940,24 @@ static void con_work(struct work_struct *work)
work.work);
mutex_lock(&con->mutex);
+ if (test_and_clear_bit(BACKOFF, &con->state)) {
+ dout("con_work %p backing off\n", con);
+ if (queue_delayed_work(ceph_msgr_wq, &con->work,
+ round_jiffies_relative(con->delay))) {
+ dout("con_work %p backoff %lu\n", con, con->delay);
+ mutex_unlock(&con->mutex);
+ return;
+ } else {
+ con->ops->put(con);
+ dout("con_work %p FAILED to back off %lu\n", con,
+ con->delay);
+ }
+ }
+ if (test_bit(STANDBY, &con->state)) {
+ dout("con_work %p STANDBY\n", con);
+ goto done;
+ }
if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
dout("con_work CLOSED\n");
con_close_socket(con);
@@ -2008,10 +2014,12 @@ static void ceph_fault(struct ceph_connection *con)
/* Requeue anything that hasn't been acked */
list_splice_init(&con->out_sent, &con->out_queue);
- /* If there are no messages in the queue, place the connection
- * in a STANDBY state (i.e., don't try to reconnect just yet). */
- if (list_empty(&con->out_queue) && !con->out_keepalive_pending) {
- dout("fault setting STANDBY\n");
+ /* If there are no messages queued or keepalive pending, place
+ * the connection in a STANDBY state */
+ if (list_empty(&con->out_queue) &&
+ !test_bit(KEEPALIVE_PENDING, &con->state)) {
+ dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
+ clear_bit(WRITE_PENDING, &con->state);
set_bit(STANDBY, &con->state);
} else {
/* retry after a delay. */
@@ -2019,11 +2027,24 @@ static void ceph_fault(struct ceph_connection *con)
con->delay = BASE_DELAY_INTERVAL;
else if (con->delay < MAX_DELAY_INTERVAL)
con->delay *= 2;
- dout("fault queueing %p delay %lu\n", con, con->delay);
con->ops->get(con);
if (queue_delayed_work(ceph_msgr_wq, &con->work,
- round_jiffies_relative(con->delay)) == 0)
+ round_jiffies_relative(con->delay))) {
+ dout("fault queued %p delay %lu\n", con, con->delay);
+ } else {
con->ops->put(con);
+ dout("fault failed to queue %p delay %lu, backoff\n",
+ con, con->delay);
+ /*
+ * In many cases we see a socket state change
+ * while con_work is running and end up
+ * queuing (non-delayed) work, such that we
+ * can't backoff with a delay. Set a flag so
+ * that when con_work restarts we schedule the
+ * delay then.
+ */
+ set_bit(BACKOFF, &con->state);
+ }
}
out_unlock:
@@ -2094,6 +2115,19 @@ void ceph_messenger_destroy(struct ceph_messenger *msgr)
}
EXPORT_SYMBOL(ceph_messenger_destroy);
+static void clear_standby(struct ceph_connection *con)
+{
+ /* come back from STANDBY? */
+ if (test_and_clear_bit(STANDBY, &con->state)) {
+ mutex_lock(&con->mutex);
+ dout("clear_standby %p and ++connect_seq\n", con);
+ con->connect_seq++;
+ WARN_ON(test_bit(WRITE_PENDING, &con->state));
+ WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
+ mutex_unlock(&con->mutex);
+ }
+}
+
/*
* Queue up an outgoing message on the given connection.
*/
@@ -2126,6 +2160,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
/* if there wasn't anything waiting to send before, queue
* new work */
+ clear_standby(con);
if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
queue_con(con);
}
@@ -2191,6 +2226,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
*/
void ceph_con_keepalive(struct ceph_connection *con)
{
+ dout("con_keepalive %p\n", con);
+ clear_standby(con);
if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
test_and_set_bit(WRITE_PENDING, &con->state) == 0)
queue_con(con);
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 1a040e64c69f..cd9c21df87d1 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -16,22 +16,30 @@ struct page **ceph_get_direct_page_vector(const char __user *data,
int num_pages, bool write_page)
{
struct page **pages;
- int rc;
+ int got = 0;
+ int rc = 0;
pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS);
if (!pages)
return ERR_PTR(-ENOMEM);
down_read(&current->mm->mmap_sem);
- rc = get_user_pages(current, current->mm, (unsigned long)data,
- num_pages, write_page, 0, pages, NULL);
+ while (got < num_pages) {
+ rc = get_user_pages(current, current->mm,
+ (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
+ num_pages - got, write_page, 0, pages + got, NULL);
+ if (rc < 0)
+ break;
+ BUG_ON(rc == 0);
+ got += rc;
+ }
up_read(&current->mm->mmap_sem);
- if (rc < num_pages)
+ if (rc < 0)
goto fail;
return pages;
fail:
- ceph_put_page_vector(pages, rc > 0 ? rc : 0, false);
+ ceph_put_page_vector(pages, got, false);
return ERR_PTR(rc);
}
EXPORT_SYMBOL(ceph_get_direct_page_vector);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8e726cb47ed7..6561021d22d1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1114,13 +1114,21 @@ EXPORT_SYMBOL(netdev_bonding_change);
void dev_load(struct net *net, const char *name)
{
struct net_device *dev;
+ int no_module;
rcu_read_lock();
dev = dev_get_by_name_rcu(net, name);
rcu_read_unlock();
- if (!dev && capable(CAP_NET_ADMIN))
- request_module("%s", name);
+ no_module = !dev;
+ if (no_module && capable(CAP_NET_ADMIN))
+ no_module = request_module("netdev-%s", name);
+ if (no_module && capable(CAP_SYS_MODULE)) {
+ if (!request_module("%s", name))
+ pr_err("Loading kernel module for a network device "
+"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s "
+"instead\n", name);
+ }
}
EXPORT_SYMBOL(dev_load);
@@ -1280,10 +1288,13 @@ static int __dev_close_many(struct list_head *head)
static int __dev_close(struct net_device *dev)
{
+ int retval;
LIST_HEAD(single);
list_add(&dev->unreg_list, &single);
- return __dev_close_many(&single);
+ retval = __dev_close_many(&single);
+ list_del(&single);
+ return retval;
}
int dev_close_many(struct list_head *head)
@@ -1325,7 +1336,7 @@ int dev_close(struct net_device *dev)
list_add(&dev->unreg_list, &single);
dev_close_many(&single);
-
+ list_del(&single);
return 0;
}
EXPORT_SYMBOL(dev_close);
@@ -5063,6 +5074,7 @@ static void rollback_registered(struct net_device *dev)
list_add(&dev->unreg_list, &single);
rollback_registered_many(&single);
+ list_del(&single);
}
unsigned long netdev_fix_features(unsigned long features, const char *name)
@@ -6216,6 +6228,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}
}
unregister_netdevice_many(&dev_kill_list);
+ list_del(&dev_kill_list);
rtnl_unlock();
}
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 508f9c18992f..133fd22ea287 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -144,7 +144,7 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
list_for_each_entry(ha, &from_list->list, list) {
type = addr_type ? addr_type : ha->type;
- __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+ __hw_addr_del(to_list, ha->addr, addr_len, type);
}
}
EXPORT_SYMBOL(__hw_addr_del_multiple);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a9e7fc4c461f..b5bada92f637 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3321,7 +3321,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
pkt_dev->started_at);
ktime_t idle = ns_to_ktime(pkt_dev->idle_acc);
- p += sprintf(p, "OK: %llu(c%llu+d%llu) nsec, %llu (%dbyte,%dfrags)\n",
+ p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n",
(unsigned long long)ktime_to_us(elapsed),
(unsigned long long)ktime_to_us(ktime_sub(elapsed, idle)),
(unsigned long long)ktime_to_us(idle),
diff --git a/net/core/scm.c b/net/core/scm.c
index bbe454450801..4c1ef026d695 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -95,7 +95,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
int fd = fdp[i];
struct file *file;
- if (fd < 0 || !(file = fget(fd)))
+ if (fd < 0 || !(file = fget_raw(fd)))
return -EBADF;
*fpp++ = file;
fpl->count++;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 6b03f561caec..c44348adba3b 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -626,6 +626,9 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb,
dcb->cmd = DCB_CMD_GAPP;
app_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_APP);
+ if (!app_nest)
+ goto out_cancel;
+
ret = nla_put_u8(dcbnl_skb, DCB_APP_ATTR_IDTYPE, idtype);
if (ret)
goto out_cancel;
@@ -1190,7 +1193,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb,
goto err;
}
- if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) {
+ if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) {
struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
err = ops->ieee_setpfc(netdev, pfc);
if (err)
@@ -1613,6 +1616,10 @@ EXPORT_SYMBOL(dcb_getapp);
u8 dcb_setapp(struct net_device *dev, struct dcb_app *new)
{
struct dcb_app_type *itr;
+ struct dcb_app_type event;
+
+ memcpy(&event.name, dev->name, sizeof(event.name));
+ memcpy(&event.app, new, sizeof(event.app));
spin_lock(&dcb_lock);
/* Search for existing match and replace */
@@ -1644,7 +1651,7 @@ u8 dcb_setapp(struct net_device *dev, struct dcb_app *new)
}
out:
spin_unlock(&dcb_lock);
- call_dcbevent_notifiers(DCB_APP_EVENT, new);
+ call_dcbevent_notifiers(DCB_APP_EVENT, &event);
return 0;
}
EXPORT_SYMBOL(dcb_setapp);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 8cde009e8b85..4222e7a654b0 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -614,6 +614,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
/* Caller (dccp_v4_do_rcv) will send Reset */
dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
return 1;
+ } else if (sk->sk_state == DCCP_CLOSED) {
+ dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+ return 1;
}
if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) {
@@ -668,10 +671,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
}
switch (sk->sk_state) {
- case DCCP_CLOSED:
- dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
- return 1;
-
case DCCP_REQUESTING:
queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
if (queued >= 0)
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 739435a6af39..cfa7a5e1c5c9 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -67,8 +67,9 @@ dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
size_t result_len = 0;
const char *data = _data, *end, *opt;
- kenter("%%%d,%s,'%s',%zu",
- key->serial, key->description, data, datalen);
+ kenter("%%%d,%s,'%*.*s',%zu",
+ key->serial, key->description,
+ (int)datalen, (int)datalen, data, datalen);
if (datalen <= 1 || !data || data[datalen - 1] != '\0')
return -EINVAL;
@@ -217,6 +218,19 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m)
seq_printf(m, ": %u", key->datalen);
}
+/*
+ * read the DNS data
+ * - the key's semaphore is read-locked
+ */
+static long dns_resolver_read(const struct key *key,
+ char __user *buffer, size_t buflen)
+{
+ if (key->type_data.x[0])
+ return key->type_data.x[0];
+
+ return user_read(key, buffer, buflen);
+}
+
struct key_type key_type_dns_resolver = {
.name = "dns_resolver",
.instantiate = dns_resolver_instantiate,
@@ -224,7 +238,7 @@ struct key_type key_type_dns_resolver = {
.revoke = user_revoke,
.destroy = user_destroy,
.describe = dns_resolver_describe,
- .read = user_read,
+ .read = dns_resolver_read,
};
static int __init init_dns_resolver(void)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 748cb5b337bd..036652c8166d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -670,7 +670,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
ifap = &ifa->ifa_next) {
if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
sin_orig.sin_addr.s_addr ==
- ifa->ifa_address) {
+ ifa->ifa_local) {
break; /* found */
}
}
@@ -1030,6 +1030,21 @@ static inline bool inetdev_valid_mtu(unsigned mtu)
return mtu >= 68;
}
+static void inetdev_send_gratuitous_arp(struct net_device *dev,
+ struct in_device *in_dev)
+
+{
+ struct in_ifaddr *ifa = in_dev->ifa_list;
+
+ if (!ifa)
+ return;
+
+ arp_send(ARPOP_REQUEST, ETH_P_ARP,
+ ifa->ifa_local, dev,
+ ifa->ifa_local, NULL,
+ dev->dev_addr, NULL);
+}
+
/* Called only under RTNL semaphore */
static int inetdev_event(struct notifier_block *this, unsigned long event,
@@ -1082,18 +1097,13 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
}
ip_mc_up(in_dev);
/* fall through */
- case NETDEV_NOTIFY_PEERS:
case NETDEV_CHANGEADDR:
+ if (!IN_DEV_ARP_NOTIFY(in_dev))
+ break;
+ /* fall through */
+ case NETDEV_NOTIFY_PEERS:
/* Send gratuitous ARP to notify of link change */
- if (IN_DEV_ARP_NOTIFY(in_dev)) {
- struct in_ifaddr *ifa = in_dev->ifa_list;
-
- if (ifa)
- arp_send(ARPOP_REQUEST, ETH_P_ARP,
- ifa->ifa_address, dev,
- ifa->ifa_address, NULL,
- dev->dev_addr, NULL);
- }
+ inetdev_send_gratuitous_arp(dev, in_dev);
break;
case NETDEV_DOWN:
ip_mc_down(in_dev);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c5af909cf701..3c8dfa16614d 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -505,7 +505,9 @@ restart:
}
rcu_read_unlock();
+ local_bh_disable();
inet_twsk_deschedule(tw, twdr);
+ local_bh_enable();
inet_twsk_put(tw);
goto restart_rcu;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index eb68a0e34e49..d1d0e2c256fc 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -775,6 +775,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
.fl4_dst = dst,
.fl4_src = tiph->saddr,
.fl4_tos = RT_TOS(tos),
+ .proto = IPPROTO_GRE,
.fl_gre_key = tunnel->parms.o_key
};
if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
@@ -1764,4 +1765,4 @@ module_exit(ipgre_fini);
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("gre");
MODULE_ALIAS_RTNL_LINK("gretap");
-MODULE_ALIAS("gre0");
+MODULE_ALIAS_NETDEV("gre0");
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 988f52fba54a..a5f58e7cbb26 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -913,4 +913,4 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
MODULE_LICENSE("GPL");
-MODULE_ALIAS("tunl0");
+MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 788a3e74834e..6ed6603c2f6d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2722,6 +2722,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
.destroy = ipv4_dst_destroy,
.check = ipv4_blackhole_dst_check,
.default_mtu = ipv4_blackhole_default_mtu,
+ .default_advmss = ipv4_default_advmss,
.update_pmtu = ipv4_rt_blackhole_update_pmtu,
};
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb7f82ebf4a3..65f6c0406245 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1222,7 +1222,7 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
}
/* D-SACK for already forgotten data... Do dumb counting. */
- if (dup_sack &&
+ if (dup_sack && tp->undo_marker && tp->undo_retrans &&
!after(end_seq_0, prior_snd_una) &&
after(end_seq_0, tp->undo_marker))
tp->undo_retrans--;
@@ -1299,7 +1299,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
- if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+ if (tp->undo_marker && tp->undo_retrans &&
+ after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
tp->undo_retrans--;
if (sacked & TCPCB_SACKED_ACKED)
state->reord = min(fack_count, state->reord);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 406f320336e6..dfa5beb0c1c8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2162,7 +2162,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (!tp->retrans_stamp)
tp->retrans_stamp = TCP_SKB_CB(skb)->when;
- tp->undo_retrans++;
+ tp->undo_retrans += tcp_skb_pcount(skb);
/* snd_nxt is stored to detect loss of retransmitted segment,
* see tcp_input.c tcp_sacktag_write_queue().
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4f4483e697bd..e528a42a52be 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -57,6 +57,7 @@
MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETDEV("ip6tnl0");
#ifdef IP6_TNL_DEBUG
#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 09c88891a753..de338037a736 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -410,7 +410,7 @@ fallback:
if (p != NULL) {
sb_add(m, "%02x", *p++);
for (i = 1; i < len; i++)
- sb_add(m, ":%02x", p[i]);
+ sb_add(m, ":%02x", *p++);
}
sb_add(m, " ");
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1c29f95695de..e7db7014e89f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -128,6 +128,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
.destroy = ip6_dst_destroy,
.check = ip6_dst_check,
.default_mtu = ip6_blackhole_default_mtu,
+ .default_advmss = ip6_default_advmss,
.update_pmtu = ip6_rt_blackhole_update_pmtu,
};
@@ -738,8 +739,10 @@ restart:
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
- else
+ else if (!(rt->dst.flags & DST_HOST))
nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
+ else
+ goto out2;
dst_release(&rt->dst);
rt = nrt ? : net->ipv6.ip6_null_entry;
@@ -2556,14 +2559,16 @@ static
int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net = current->nsproxy->net_ns;
- int delay = net->ipv6.sysctl.flush_delay;
- if (write) {
- proc_dointvec(ctl, write, buffer, lenp, ppos);
- fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
- return 0;
- } else
+ struct net *net;
+ int delay;
+ if (!write)
return -EINVAL;
+
+ net = (struct net *)ctl->extra1;
+ delay = net->ipv6.sysctl.flush_delay;
+ proc_dointvec(ctl, write, buffer, lenp, ppos);
+ fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
+ return 0;
}
ctl_table ipv6_route_table_template[] = {
@@ -2650,6 +2655,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
if (table) {
table[0].data = &net->ipv6.sysctl.flush_delay;
+ table[0].extra1 = net;
table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8ce38f10a547..d2c16e10f650 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1290,4 +1290,4 @@ static int __init sit_init(void)
module_init(sit_init);
module_exit(sit_cleanup);
MODULE_LICENSE("GPL");
-MODULE_ALIAS("sit0");
+MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8acba456744e..7a10a8d1b2d0 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1229,6 +1229,7 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
}
mutex_unlock(&local->iflist_mtx);
unregister_netdevice_many(&unreg_list);
+ list_del(&unreg_list);
}
static u32 ieee80211_idle_off(struct ieee80211_local *local,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 45fbb9e33746..c9ceb4d57ab0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1033,6 +1033,12 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
if (is_multicast_ether_addr(hdr->addr1))
return;
+ /*
+ * In case we receive frames after disassociation.
+ */
+ if (!sdata->u.mgd.associated)
+ return;
+
ieee80211_sta_reset_conn_monitor(sdata);
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index cf68700abffa..d036597aabbe 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1210,7 +1210,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
switch (sdata->vif.type) {
case NL80211_IFTYPE_STATION:
changed |= BSS_CHANGED_ASSOC;
+ mutex_lock(&sdata->u.mgd.mtx);
ieee80211_bss_info_change_notify(sdata, changed);
+ mutex_unlock(&sdata->u.mgd.mtx);
break;
case NL80211_IFTYPE_ADHOC:
changed |= BSS_CHANGED_IBSS;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 32fcbe290c04..4aa614b8a96a 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -133,6 +133,7 @@ unsigned int nf_iterate(struct list_head *head,
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
+repeat:
verdict = elem->hook(hook, skb, indev, outdev, okfn);
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
@@ -145,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head,
#endif
if (verdict != NF_REPEAT)
return verdict;
- *i = (*i)->prev;
+ goto repeat;
}
}
return NF_ACCEPT;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 22f7ad5101ab..ba98e1308f3c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -808,9 +808,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
dest->u_threshold = udest->u_threshold;
dest->l_threshold = udest->l_threshold;
- spin_lock(&dest->dst_lock);
+ spin_lock_bh(&dest->dst_lock);
ip_vs_dst_reset(dest);
- spin_unlock(&dest->dst_lock);
+ spin_unlock_bh(&dest->dst_lock);
if (add)
ip_vs_new_estimator(&dest->stats);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index b07393eab88e..91816998ed86 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -85,6 +85,8 @@ EXPORT_SYMBOL(nf_log_unregister);
int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
{
+ if (pf >= ARRAY_SIZE(nf_loggers))
+ return -EINVAL;
mutex_lock(&nf_log_mutex);
if (__find_logger(pf, logger->name) == NULL) {
mutex_unlock(&nf_log_mutex);
@@ -98,6 +100,8 @@ EXPORT_SYMBOL(nf_log_bind_pf);
void nf_log_unbind_pf(u_int8_t pf)
{
+ if (pf >= ARRAY_SIZE(nf_loggers))
+ return;
mutex_lock(&nf_log_mutex);
rcu_assign_pointer(nf_loggers[pf], NULL);
mutex_unlock(&nf_log_mutex);
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index 4d87befb04c0..474d621cbc2e 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -28,26 +28,23 @@ nf_tproxy_destructor(struct sk_buff *skb)
skb->destructor = NULL;
if (sk)
- nf_tproxy_put_sock(sk);
+ sock_put(sk);
}
/* consumes sk */
-int
+void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
{
- bool transparent = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_transparent :
- inet_sk(sk)->transparent;
-
- if (transparent) {
- skb_orphan(skb);
- skb->sk = sk;
- skb->destructor = nf_tproxy_destructor;
- return 1;
- } else
- nf_tproxy_put_sock(sk);
-
- return 0;
+ /* assigning tw sockets complicates things; most
+ * skb->sk->X checks would have to test sk->sk_state first */
+ if (sk->sk_state == TCP_TIME_WAIT) {
+ inet_twsk_put(inet_twsk(sk));
+ return;
+ }
+
+ skb_orphan(skb);
+ skb->sk = sk;
+ skb->destructor = nf_tproxy_destructor;
}
EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 640678f47a2a..dcfd57eb9d02 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -33,6 +33,20 @@
#include <net/netfilter/nf_tproxy_core.h>
#include <linux/netfilter/xt_TPROXY.h>
+static bool tproxy_sk_is_transparent(struct sock *sk)
+{
+ if (sk->sk_state != TCP_TIME_WAIT) {
+ if (inet_sk(sk)->transparent)
+ return true;
+ sock_put(sk);
+ } else {
+ if (inet_twsk(sk)->tw_transparent)
+ return true;
+ inet_twsk_put(inet_twsk(sk));
+ }
+ return false;
+}
+
static inline __be32
tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
{
@@ -141,7 +155,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
skb->dev, NFT_LOOKUP_LISTENER);
/* NOTE: assign_sock consumes our sk reference */
- if (sk && nf_tproxy_assign_sock(skb, sk)) {
+ if (sk && tproxy_sk_is_transparent(sk)) {
/* This should be in a separate target, but we don't do multiple
targets on the same rule yet */
skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
@@ -149,6 +163,8 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
iph->protocol, &iph->daddr, ntohs(hp->dest),
&laddr, ntohs(lport), skb->mark);
+
+ nf_tproxy_assign_sock(skb, sk);
return NF_ACCEPT;
}
@@ -306,7 +322,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
par->in, NFT_LOOKUP_LISTENER);
/* NOTE: assign_sock consumes our sk reference */
- if (sk && nf_tproxy_assign_sock(skb, sk)) {
+ if (sk && tproxy_sk_is_transparent(sk)) {
/* This should be in a separate target, but we don't do multiple
targets on the same rule yet */
skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
@@ -314,6 +330,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
tproto, &iph->saddr, ntohs(hp->source),
laddr, ntohs(lport), skb->mark);
+
+ nf_tproxy_assign_sock(skb, sk);
return NF_ACCEPT;
}
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 00d6ae838303..9cc46356b577 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -35,6 +35,15 @@
#include <net/netfilter/nf_conntrack.h>
#endif
+static void
+xt_socket_put_sk(struct sock *sk)
+{
+ if (sk->sk_state == TCP_TIME_WAIT)
+ inet_twsk_put(inet_twsk(sk));
+ else
+ sock_put(sk);
+}
+
static int
extract_icmp4_fields(const struct sk_buff *skb,
u8 *protocol,
@@ -164,7 +173,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
(sk->sk_state == TCP_TIME_WAIT &&
inet_twsk(sk)->tw_transparent));
- nf_tproxy_put_sock(sk);
+ xt_socket_put_sk(sk);
if (wildcard || !transparent)
sk = NULL;
@@ -298,7 +307,7 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
(sk->sk_state == TCP_TIME_WAIT &&
inet_twsk(sk)->tw_transparent));
- nf_tproxy_put_sock(sk);
+ xt_socket_put_sk(sk);
if (wildcard || !transparent)
sk = NULL;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 478181d53c55..1f924595bdef 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1407,7 +1407,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
int noblock = flags&MSG_DONTWAIT;
size_t copied;
struct sk_buff *skb, *data_skb;
- int err;
+ int err, ret;
if (flags&MSG_OOB)
return -EOPNOTSUPP;
@@ -1470,8 +1470,13 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
skb_free_datagram(sk, skb);
- if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
- netlink_dump(sk);
+ if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
+ ret = netlink_dump(sk);
+ if (ret) {
+ sk->sk_err = ret;
+ sk->sk_error_report(sk);
+ }
+ }
scm_recv(sock, msg, siocb->scm, flags);
out:
@@ -1736,6 +1741,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
struct netlink_callback *cb;
struct sock *sk;
struct netlink_sock *nlk;
+ int ret;
cb = kzalloc(sizeof(*cb), GFP_KERNEL);
if (cb == NULL)
@@ -1764,9 +1770,13 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
nlk->cb = cb;
mutex_unlock(nlk->cb_mutex);
- netlink_dump(sk);
+ ret = netlink_dump(sk);
+
sock_put(sk);
+ if (ret)
+ return ret;
+
/* We successfully started a dump, by returning -EINTR we
* signal not to send ACK even if it was requested.
*/
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 4123967d4d65..cce19f95c624 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -364,7 +364,6 @@ void rds_ib_exit(void)
rds_ib_sysctl_exit();
rds_ib_recv_exit();
rds_trans_unregister(&rds_ib_transport);
- rds_ib_fmr_exit();
}
struct rds_transport rds_ib_transport = {
@@ -400,13 +399,9 @@ int rds_ib_init(void)
INIT_LIST_HEAD(&rds_ib_devices);
- ret = rds_ib_fmr_init();
- if (ret)
- goto out;
-
ret = ib_register_client(&rds_ib_client);
if (ret)
- goto out_fmr_exit;
+ goto out;
ret = rds_ib_sysctl_init();
if (ret)
@@ -430,8 +425,6 @@ out_sysctl:
rds_ib_sysctl_exit();
out_ibreg:
rds_ib_unregister_client();
-out_fmr_exit:
- rds_ib_fmr_exit();
out:
return ret;
}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index e34ad032b66d..4297d92788dc 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -307,8 +307,6 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
void rds_ib_sync_mr(void *trans_private, int dir);
void rds_ib_free_mr(void *trans_private, int invalidate);
void rds_ib_flush_mrs(void);
-int rds_ib_fmr_init(void);
-void rds_ib_fmr_exit(void);
/* ib_recv.c */
int rds_ib_recv_init(void);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 18a833c450c8..819c35a0d9cb 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -38,8 +38,6 @@
#include "ib.h"
#include "xlist.h"
-static struct workqueue_struct *rds_ib_fmr_wq;
-
static DEFINE_PER_CPU(unsigned long, clean_list_grace);
#define CLEAN_LIST_BUSY_BIT 0
@@ -307,7 +305,7 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
int err = 0, iter = 0;
if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
- queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
+ schedule_delayed_work(&pool->flush_worker, 10);
while (1) {
ibmr = rds_ib_reuse_fmr(pool);
@@ -696,24 +694,6 @@ out_nolock:
return ret;
}
-int rds_ib_fmr_init(void)
-{
- rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
- if (!rds_ib_fmr_wq)
- return -ENOMEM;
- return 0;
-}
-
-/*
- * By the time this is called all the IB devices should have been torn down and
- * had their pools freed. As each pool is freed its work struct is waited on,
- * so the pool flushing work queue should be idle by the time we get here.
- */
-void rds_ib_fmr_exit(void)
-{
- destroy_workqueue(rds_ib_fmr_wq);
-}
-
static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
{
struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
@@ -741,7 +721,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
/* If we've pinned too many pages, request a flush */
if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
atomic_read(&pool->dirty_count) >= pool->max_items / 10)
- queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
+ schedule_delayed_work(&pool->flush_worker, 10);
if (invalidate) {
if (likely(!in_interrupt())) {
@@ -749,8 +729,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
} else {
/* We get here if the user created a MR marked
* as use_once and invalidate at the same time. */
- queue_delayed_work(rds_ib_fmr_wq,
- &pool->flush_worker, 10);
+ schedule_delayed_work(&pool->flush_worker, 10);
}
}
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 71f373c421bc..c47a511f203d 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -551,7 +551,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
if (conn->c_loopback
&& rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
- return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+ scat = &rm->data.op_sg[sg];
+ ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+ ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
+ return ret;
}
/* FIXME we may overallocate here */
diff --git a/net/rds/loop.c b/net/rds/loop.c
index aeec1d483b17..bca6761a3ca2 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,15 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
unsigned int hdr_off, unsigned int sg,
unsigned int off)
{
+ struct scatterlist *sgp = &rm->data.op_sg[sg];
+ int ret = sizeof(struct rds_header) +
+ be32_to_cpu(rm->m_inc.i_hdr.h_len);
+
/* Do not send cong updates to loopback */
if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
- return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+ ret = min_t(int, ret, sgp->length - conn->c_xmit_data_off);
+ goto out;
}
BUG_ON(hdr_off || sg || off);
@@ -80,8 +85,8 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
NULL);
rds_inc_put(&rm->m_inc);
-
- return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len);
+out:
+ return ret;
}
/*
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 89315009bab1..1a2b0633fece 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -423,6 +423,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
goto protocol_error;
}
+ case RXRPC_PACKET_TYPE_ACKALL:
case RXRPC_PACKET_TYPE_ACK:
/* ACK processing is done in process context */
read_lock_bh(&call->state_lock);
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 5ee16f0353fe..43ea7de2fc8e 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -25,6 +25,7 @@
#include <keys/user-type.h>
#include "ar-internal.h"
+static int rxrpc_vet_description_s(const char *);
static int rxrpc_instantiate(struct key *, const void *, size_t);
static int rxrpc_instantiate_s(struct key *, const void *, size_t);
static void rxrpc_destroy(struct key *);
@@ -52,6 +53,7 @@ EXPORT_SYMBOL(key_type_rxrpc);
*/
struct key_type key_type_rxrpc_s = {
.name = "rxrpc_s",
+ .vet_description = rxrpc_vet_description_s,
.instantiate = rxrpc_instantiate_s,
.match = user_match,
.destroy = rxrpc_destroy_s,
@@ -59,6 +61,23 @@ struct key_type key_type_rxrpc_s = {
};
/*
+ * Vet the description for an RxRPC server key
+ */
+static int rxrpc_vet_description_s(const char *desc)
+{
+ unsigned long num;
+ char *p;
+
+ num = simple_strtoul(desc, &p, 10);
+ if (*p != ':' || num > 65535)
+ return -EINVAL;
+ num = simple_strtoul(p + 1, &p, 10);
+ if (*p || num < 1 || num > 255)
+ return -EINVAL;
+ return 0;
+}
+
+/*
* parse an RxKAD type XDR format token
* - the caller guarantees we have at least 4 words
*/
@@ -89,11 +108,11 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
return ret;
plen -= sizeof(*token);
- token = kmalloc(sizeof(*token), GFP_KERNEL);
+ token = kzalloc(sizeof(*token), GFP_KERNEL);
if (!token)
return -ENOMEM;
- token->kad = kmalloc(plen, GFP_KERNEL);
+ token->kad = kzalloc(plen, GFP_KERNEL);
if (!token->kad) {
kfree(token);
return -ENOMEM;
@@ -731,10 +750,10 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
goto error;
ret = -ENOMEM;
- token = kmalloc(sizeof(*token), GFP_KERNEL);
+ token = kzalloc(sizeof(*token), GFP_KERNEL);
if (!token)
goto error;
- token->kad = kmalloc(plen, GFP_KERNEL);
+ token->kad = kzalloc(plen, GFP_KERNEL);
if (!token->kad)
goto error_free;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 34dc598440a2..1bc698039ae2 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -839,6 +839,7 @@ void dev_deactivate(struct net_device *dev)
list_add(&dev->unreg_list, &single);
dev_deactivate_many(&single);
+ list_del(&single);
}
static void dev_init_scheduler_queue(struct net_device *dev,
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 2cc46f0962ca..b23428f3c0dd 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2029,11 +2029,11 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
*errp = sctp_make_op_error_fixed(asoc, chunk);
if (*errp) {
- sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
- WORD_ROUND(ntohs(param.p->length)));
- sctp_addto_chunk_fixed(*errp,
- WORD_ROUND(ntohs(param.p->length)),
- param.v);
+ if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
+ WORD_ROUND(ntohs(param.p->length))))
+ sctp_addto_chunk_fixed(*errp,
+ WORD_ROUND(ntohs(param.p->length)),
+ param.v);
} else {
/* If there is no memory for generating the ERROR
* report as specified, an ABORT will be triggered
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 243fc09b164e..3fc8624fcd17 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -252,23 +252,37 @@ static void rpc_set_active(struct rpc_task *task)
/*
* Mark an RPC call as having completed by clearing the 'active' bit
+ * and then waking up all tasks that were sleeping.
*/
-static void rpc_mark_complete_task(struct rpc_task *task)
+static int rpc_complete_task(struct rpc_task *task)
{
- smp_mb__before_clear_bit();
+ void *m = &task->tk_runstate;
+ wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE);
+ struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&wq->lock, flags);
clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
- smp_mb__after_clear_bit();
- wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE);
+ ret = atomic_dec_and_test(&task->tk_count);
+ if (waitqueue_active(wq))
+ __wake_up_locked_key(wq, TASK_NORMAL, &k);
+ spin_unlock_irqrestore(&wq->lock, flags);
+ return ret;
}
/*
* Allow callers to wait for completion of an RPC call
+ *
+ * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit()
+ * to enforce taking of the wq->lock and hence avoid races with
+ * rpc_complete_task().
*/
int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
{
if (action == NULL)
action = rpc_wait_bit_killable;
- return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
+ return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
action, TASK_KILLABLE);
}
EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
@@ -857,34 +871,67 @@ static void rpc_async_release(struct work_struct *work)
rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
}
-void rpc_put_task(struct rpc_task *task)
+static void rpc_release_resources_task(struct rpc_task *task)
{
- if (!atomic_dec_and_test(&task->tk_count))
- return;
- /* Release resources */
if (task->tk_rqstp)
xprt_release(task);
if (task->tk_msg.rpc_cred)
put_rpccred(task->tk_msg.rpc_cred);
rpc_task_release_client(task);
- if (task->tk_workqueue != NULL) {
+}
+
+static void rpc_final_put_task(struct rpc_task *task,
+ struct workqueue_struct *q)
+{
+ if (q != NULL) {
INIT_WORK(&task->u.tk_work, rpc_async_release);
- queue_work(task->tk_workqueue, &task->u.tk_work);
+ queue_work(q, &task->u.tk_work);
} else
rpc_free_task(task);
}
+
+static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q)
+{
+ if (atomic_dec_and_test(&task->tk_count)) {
+ rpc_release_resources_task(task);
+ rpc_final_put_task(task, q);
+ }
+}
+
+void rpc_put_task(struct rpc_task *task)
+{
+ rpc_do_put_task(task, NULL);
+}
EXPORT_SYMBOL_GPL(rpc_put_task);
+void rpc_put_task_async(struct rpc_task *task)
+{
+ rpc_do_put_task(task, task->tk_workqueue);
+}
+EXPORT_SYMBOL_GPL(rpc_put_task_async);
+
static void rpc_release_task(struct rpc_task *task)
{
dprintk("RPC: %5u release task\n", task->tk_pid);
BUG_ON (RPC_IS_QUEUED(task));
- /* Wake up anyone who is waiting for task completion */
- rpc_mark_complete_task(task);
+ rpc_release_resources_task(task);
- rpc_put_task(task);
+ /*
+ * Note: at this point we have been removed from rpc_clnt->cl_tasks,
+ * so it should be safe to use task->tk_count as a test for whether
+ * or not any other processes still hold references to our rpc_task.
+ */
+ if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) {
+ /* Wake up anyone who may be waiting for task completion */
+ if (!rpc_complete_task(task))
+ return;
+ } else {
+ if (!atomic_dec_and_test(&task->tk_count))
+ return;
+ }
+ rpc_final_put_task(task, task->tk_workqueue);
}
int rpciod_up(void)
@@ -908,7 +955,7 @@ static int rpciod_start(void)
* Create the rpciod thread and wait for it to start.
*/
dprintk("RPC: creating workqueue rpciod\n");
- wq = alloc_workqueue("rpciod", WQ_RESCUER, 0);
+ wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0);
rpciod_workqueue = wq;
return rpciod_workqueue != NULL;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9df1eadc912a..1a10dcd999ea 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1335,6 +1335,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
p, 0, length, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
put_page(p);
+ svc_rdma_put_context(ctxt, 1);
return;
}
atomic_inc(&xprt->sc_dma_used);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c431f5a57960..be96d429b475 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1631,7 +1631,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
}
xs_reclassify_socket(family, sock);
- if (xs_bind(transport, sock)) {
+ err = xs_bind(transport, sock);
+ if (err) {
sock_release(sock);
goto out;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index dd419d286204..ba5b8c208498 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -850,7 +850,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* Get the parent directory, calculate the hash for last
* component.
*/
- err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
+ err = kern_path_parent(sunaddr->sun_path, &nd);
if (err)
goto out_mknod_parent;
@@ -1724,7 +1724,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
msg->msg_namelen = 0;
- mutex_lock(&u->readlock);
+ err = mutex_lock_interruptible(&u->readlock);
+ if (err) {
+ err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
+ goto out;
+ }
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb) {
@@ -1864,7 +1868,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
memset(&tmp_scm, 0, sizeof(tmp_scm));
}
- mutex_lock(&u->readlock);
+ err = mutex_lock_interruptible(&u->readlock);
+ if (err) {
+ err = sock_intr_errno(timeo);
+ goto out;
+ }
do {
int chunk;
@@ -1895,11 +1903,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
timeo = unix_stream_data_wait(sk, timeo);
- if (signal_pending(current)) {
+ if (signal_pending(current)
+ || mutex_lock_interruptible(&u->readlock)) {
err = sock_intr_errno(timeo);
goto out;
}
- mutex_lock(&u->readlock);
+
continue;
unlock:
unix_state_unlock(sk);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index f89f83bf828e..b6f4b994eb35 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -104,7 +104,7 @@ struct sock *unix_get_socket(struct file *filp)
/*
* Socket ?
*/
- if (S_ISSOCK(inode->i_mode)) {
+ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
struct socket *sock = SOCKET_I(inode);
struct sock *s = sock->sk;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 3e5dbd4e4cd5..d112f038edf0 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -802,11 +802,11 @@ int cfg80211_wext_siwfreq(struct net_device *dev,
return freq;
if (freq == 0)
return -EINVAL;
- wdev_lock(wdev);
mutex_lock(&rdev->devlist_mtx);
+ wdev_lock(wdev);
err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
- mutex_unlock(&rdev->devlist_mtx);
wdev_unlock(wdev);
+ mutex_unlock(&rdev->devlist_mtx);
return err;
default:
return -EOPNOTSUPP;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 8b3ef404c794..6459588befc3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1340,10 +1340,13 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
default:
BUG();
}
- xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
+ xdst = dst_alloc(dst_ops);
xfrm_policy_put_afinfo(afinfo);
- xdst->flo.ops = &xfrm_bundle_fc_ops;
+ if (likely(xdst))
+ xdst->flo.ops = &xfrm_bundle_fc_ops;
+ else
+ xdst = ERR_PTR(-ENOBUFS);
return xdst;
}
diff --git a/scripts/basic/fixdep.c b/scripts/basic/fixdep.c
index c9a16abacab4..291228e25984 100644
--- a/scripts/basic/fixdep.c
+++ b/scripts/basic/fixdep.c
@@ -309,12 +309,18 @@ static void do_config_file(const char *filename)
close(fd);
}
+/*
+ * Important: The below generated source_foo.o and deps_foo.o variable
+ * assignments are parsed not only by make, but also by the rather simple
+ * parser in scripts/mod/sumversion.c.
+ */
static void parse_dep_file(void *map, size_t len)
{
char *m = map;
char *end = m + len;
char *p;
char s[PATH_MAX];
+ int first;
p = strchr(m, ':');
if (!p) {
@@ -322,11 +328,11 @@ static void parse_dep_file(void *map, size_t len)
exit(1);
}
memcpy(s, m, p-m); s[p-m] = 0;
- printf("deps_%s := \\\n", target);
m = p+1;
clear_config();
+ first = 1;
while (m < end) {
while (m < end && (*m == ' ' || *m == '\\' || *m == '\n'))
m++;
@@ -340,9 +346,20 @@ static void parse_dep_file(void *map, size_t len)
if (strrcmp(s, "include/generated/autoconf.h") &&
strrcmp(s, "arch/um/include/uml-config.h") &&
strrcmp(s, ".ver")) {
- printf(" %s \\\n", s);
+ /*
+ * Do not list the source file as dependency, so that
+ * kbuild is not confused if a .c file is rewritten
+ * into .S or vice versa. Storing it in source_* is
+ * needed for modpost to compute srcversions.
+ */
+ if (first) {
+ printf("source_%s := %s\n\n", target, s);
+ printf("deps_%s := \\\n", target);
+ } else
+ printf(" %s \\\n", s);
do_config_file(s);
}
+ first = 0;
m = p + 1;
}
printf("\n%s: $(deps_%s)\n\n", target, target);
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4c0383da1c9a..58848e3e392c 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2654,11 +2654,6 @@ sub process {
WARN("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr);
}
-# SPIN_LOCK_UNLOCKED & RW_LOCK_UNLOCKED are deprecated
- if ($line =~ /\b(SPIN_LOCK_UNLOCKED|RW_LOCK_UNLOCKED)/) {
- ERROR("Use of $1 is deprecated: see Documentation/spinlocks.txt\n" . $herecurr);
- }
-
# warn about #if 0
if ($line =~ /^.\s*\#\s*if\s+0\b/) {
CHK("if this code is redundant consider removing it\n" .
diff --git a/scripts/kconfig/streamline_config.pl b/scripts/kconfig/streamline_config.pl
index fd81fc33d633..a4fe923c0131 100644
--- a/scripts/kconfig/streamline_config.pl
+++ b/scripts/kconfig/streamline_config.pl
@@ -1,6 +1,6 @@
#!/usr/bin/perl -w
#
-# Copywrite 2005-2009 - Steven Rostedt
+# Copyright 2005-2009 - Steven Rostedt
# Licensed under the terms of the GNU GPL License version 2
#
# It's simple enough to figure out how this works.
diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index ecf9c7dc1825..9dfcd6d988da 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -300,8 +300,8 @@ static int is_static_library(const char *objfile)
return 0;
}
-/* We have dir/file.o. Open dir/.file.o.cmd, look for deps_ line to
- * figure out source file. */
+/* We have dir/file.o. Open dir/.file.o.cmd, look for source_ and deps_ line
+ * to figure out source files. */
static int parse_source_files(const char *objfile, struct md4_ctx *md)
{
char *cmd, *file, *line, *dir;
@@ -340,6 +340,21 @@ static int parse_source_files(const char *objfile, struct md4_ctx *md)
*/
while ((line = get_next_line(&pos, file, flen)) != NULL) {
char* p = line;
+
+ if (strncmp(line, "source_", sizeof("source_")-1) == 0) {
+ p = strrchr(line, ' ');
+ if (!p) {
+ warn("malformed line: %s\n", line);
+ goto out_file;
+ }
+ p++;
+ if (!parse_file(p, md)) {
+ warn("could not open %s: %s\n",
+ p, strerror(errno));
+ goto out_file;
+ }
+ continue;
+ }
if (strncmp(line, "deps_", sizeof("deps_")-1) == 0) {
check_files = 1;
continue;
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 038b3d1e2981..f9f6f52db772 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -206,7 +206,8 @@ static uint32_t (*w2)(uint16_t);
static int
is_mcounted_section_name(char const *const txtname)
{
- return 0 == strcmp(".text", txtname) ||
+ return 0 == strcmp(".text", txtname) ||
+ 0 == strcmp(".ref.text", txtname) ||
0 == strcmp(".sched.text", txtname) ||
0 == strcmp(".spinlock.text", txtname) ||
0 == strcmp(".irqentry.text", txtname) ||
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 1d7963f4ee79..4be0deea71ca 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -130,6 +130,7 @@ if ($inputfile =~ m,kernel/trace/ftrace\.o$,) {
# Acceptable sections to record.
my %text_sections = (
".text" => 1,
+ ".ref.text" => 1,
".sched.text" => 1,
".spinlock.text" => 1,
".irqentry.text" => 1,
diff --git a/scripts/rt-tester/rt-tester.py b/scripts/rt-tester/rt-tester.py
index 44423b4dcb82..8c81d76959ee 100644
--- a/scripts/rt-tester/rt-tester.py
+++ b/scripts/rt-tester/rt-tester.py
@@ -33,8 +33,6 @@ cmd_opcodes = {
"lockintnowait" : "6",
"lockcont" : "7",
"unlock" : "8",
- "lockbkl" : "9",
- "unlockbkl" : "10",
"signal" : "11",
"resetevent" : "98",
"reset" : "99",
diff --git a/scripts/rt-tester/t2-l1-2rt-sameprio.tst b/scripts/rt-tester/t2-l1-2rt-sameprio.tst
index 8821f27cc8be..3710c8b2090d 100644
--- a/scripts/rt-tester/t2-l1-2rt-sameprio.tst
+++ b/scripts/rt-tester/t2-l1-2rt-sameprio.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal 0
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t2-l1-pi.tst b/scripts/rt-tester/t2-l1-pi.tst
index cde1f189a02b..b4cc95975adb 100644
--- a/scripts/rt-tester/t2-l1-pi.tst
+++ b/scripts/rt-tester/t2-l1-pi.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal 0
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t2-l1-signal.tst b/scripts/rt-tester/t2-l1-signal.tst
index 3ab0bfc49950..1b57376cc1f7 100644
--- a/scripts/rt-tester/t2-l1-signal.tst
+++ b/scripts/rt-tester/t2-l1-signal.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal 0
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t2-l2-2rt-deadlock.tst b/scripts/rt-tester/t2-l2-2rt-deadlock.tst
index f4b5d5d6215f..68b10629b6f4 100644
--- a/scripts/rt-tester/t2-l2-2rt-deadlock.tst
+++ b/scripts/rt-tester/t2-l2-2rt-deadlock.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal 0
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-1rt.tst b/scripts/rt-tester/t3-l1-pi-1rt.tst
index 63440ca2cce9..8e6c8b11ae56 100644
--- a/scripts/rt-tester/t3-l1-pi-1rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-1rt.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-2rt.tst b/scripts/rt-tester/t3-l1-pi-2rt.tst
index e5816fe67df3..69c2212fc520 100644
--- a/scripts/rt-tester/t3-l1-pi-2rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-2rt.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-3rt.tst b/scripts/rt-tester/t3-l1-pi-3rt.tst
index 718b82b5d3bb..9b0f1eb26a88 100644
--- a/scripts/rt-tester/t3-l1-pi-3rt.tst
+++ b/scripts/rt-tester/t3-l1-pi-3rt.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-signal.tst b/scripts/rt-tester/t3-l1-pi-signal.tst
index c6e213563498..39ec74ab06ee 100644
--- a/scripts/rt-tester/t3-l1-pi-signal.tst
+++ b/scripts/rt-tester/t3-l1-pi-signal.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t3-l1-pi-steal.tst b/scripts/rt-tester/t3-l1-pi-steal.tst
index f53749d59d79..e03db7e010fa 100644
--- a/scripts/rt-tester/t3-l1-pi-steal.tst
+++ b/scripts/rt-tester/t3-l1-pi-steal.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t3-l2-pi.tst b/scripts/rt-tester/t3-l2-pi.tst
index cdc3e4fd7bac..7b59100d3e48 100644
--- a/scripts/rt-tester/t3-l2-pi.tst
+++ b/scripts/rt-tester/t3-l2-pi.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t4-l2-pi-deboost.tst b/scripts/rt-tester/t4-l2-pi-deboost.tst
index baa14137f473..2f0e049d6443 100644
--- a/scripts/rt-tester/t4-l2-pi-deboost.tst
+++ b/scripts/rt-tester/t4-l2-pi-deboost.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
index e6ec0c81b54d..04f4034ff895 100644
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
+++ b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
index ca64f8bbf4bc..a48a6ee29ddc 100644
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
+++ b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
@@ -19,8 +19,6 @@
# lockintnowait lock nr (0-7)
# lockcont lock nr (0-7)
# unlock lock nr (0-7)
-# lockbkl lock nr (0-7)
-# unlockbkl lock nr (0-7)
# signal thread to signal (0-7)
# reset 0
# resetevent 0
@@ -39,9 +37,6 @@
# blocked lock nr (0-7)
# blockedwake lock nr (0-7)
# unlocked lock nr (0-7)
-# lockedbkl dont care
-# blockedbkl dont care
-# unlockedbkl dont care
# opcodeeq command opcode or number
# opcodelt number
# opcodegt number
diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c
index 58a12c278706..539855ff31f9 100644
--- a/scripts/selinux/genheaders/genheaders.c
+++ b/scripts/selinux/genheaders/genheaders.c
@@ -43,6 +43,8 @@ int main(int argc, char *argv[])
int i, j, k;
int isids_len;
FILE *fout;
+ const char *needle = "SOCKET";
+ char *substr;
progname = argv[0];
@@ -88,6 +90,24 @@ int main(int argc, char *argv[])
fprintf(fout, "%2d\n", i);
}
fprintf(fout, "\n#define SECINITSID_NUM %d\n", i-1);
+ fprintf(fout, "\nstatic inline bool security_is_socket_class(u16 kern_tclass)\n");
+ fprintf(fout, "{\n");
+ fprintf(fout, "\tbool sock = false;\n\n");
+ fprintf(fout, "\tswitch (kern_tclass) {\n");
+ for (i = 0; secclass_map[i].name; i++) {
+ struct security_class_mapping *map = &secclass_map[i];
+ substr = strstr(map->name, needle);
+ if (substr && strcmp(substr, needle) == 0)
+ fprintf(fout, "\tcase SECCLASS_%s:\n", map->name);
+ }
+ fprintf(fout, "\t\tsock = true;\n");
+ fprintf(fout, "\t\tbreak;\n");
+ fprintf(fout, "\tdefault:\n");
+ fprintf(fout, "\t\tbreak;\n");
+ fprintf(fout, "\t}\n\n");
+ fprintf(fout, "\treturn sock;\n");
+ fprintf(fout, "}\n");
+
fprintf(fout, "\n#endif\n");
fclose(fout);
diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
index f204869399ea..2dafe50a2e25 100644
--- a/security/apparmor/Makefile
+++ b/security/apparmor/Makefile
@@ -6,19 +6,47 @@ apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
resource.o sid.o file.o
-clean-files: capability_names.h af_names.h
+clean-files := capability_names.h rlim_names.h
+
+# Build a lower case string table of capability names
+# Transforms lines from
+# #define CAP_DAC_OVERRIDE 1
+# to
+# [1] = "dac_override",
quiet_cmd_make-caps = GEN $@
-cmd_make-caps = echo "static const char *capability_names[] = {" > $@ ; sed -n -e "/CAP_FS_MASK/d" -e "s/^\#define[ \\t]\\+CAP_\\([A-Z0-9_]\\+\\)[ \\t]\\+\\([0-9]\\+\\)\$$/[\\2] = \"\\1\",/p" $< | tr A-Z a-z >> $@ ; echo "};" >> $@
+cmd_make-caps = echo "static const char *capability_names[] = {" > $@ ;\
+ sed $< >>$@ -r -n -e '/CAP_FS_MASK/d' \
+ -e 's/^\#define[ \t]+CAP_([A-Z0-9_]+)[ \t]+([0-9]+)/[\2] = "\L\1",/p';\
+ echo "};" >> $@
+
+# Build a lower case string table of rlimit names.
+# Transforms lines from
+# #define RLIMIT_STACK 3 /* max stack size */
+# to
+# [RLIMIT_STACK] = "stack",
+#
+# and build a second integer table (with the second sed cmd), that maps
+# RLIMIT defines to the order defined in asm-generic/resource.h Thi is
+# required by policy load to map policy ordering of RLIMITs to internal
+# ordering for architectures that redefine an RLIMIT.
+# Transforms lines from
+# #define RLIMIT_STACK 3 /* max stack size */
+# to
+# RLIMIT_STACK,
quiet_cmd_make-rlim = GEN $@
-cmd_make-rlim = echo "static const char *rlim_names[] = {" > $@ ; sed -n --e "/AF_MAX/d" -e "s/^\# \\?define[ \\t]\\+RLIMIT_\\([A-Z0-9_]\\+\\)[ \\t]\\+\\([0-9]\\+\\)\\(.*\\)\$$/[\\2] = \"\\1\",/p" $< | tr A-Z a-z >> $@ ; echo "};" >> $@ ; echo "static const int rlim_map[] = {" >> $@ ; sed -n -e "/AF_MAX/d" -e "s/^\# \\?define[ \\t]\\+\\(RLIMIT_[A-Z0-9_]\\+\\)[ \\t]\\+\\([0-9]\\+\\)\\(.*\\)\$$/\\1,/p" $< >> $@ ; echo "};" >> $@
+cmd_make-rlim = echo "static const char *rlim_names[] = {" > $@ ;\
+ sed $< >> $@ -r -n \
+ -e 's/^\# ?define[ \t]+(RLIMIT_([A-Z0-9_]+)).*/[\1] = "\L\2",/p';\
+ echo "};" >> $@ ;\
+ echo "static const int rlim_map[] = {" >> $@ ;\
+ sed -r -n "s/^\# ?define[ \t]+(RLIMIT_[A-Z0-9_]+).*/\1,/p" $< >> $@ ;\
+ echo "};" >> $@
$(obj)/capability.o : $(obj)/capability_names.h
$(obj)/resource.o : $(obj)/rlim_names.h
$(obj)/capability_names.h : $(srctree)/include/linux/capability.h
$(call cmd,make-caps)
-$(obj)/af_names.h : $(srctree)/include/linux/socket.h
- $(call cmd,make-af)
$(obj)/rlim_names.h : $(srctree)/include/asm-generic/resource.h
$(call cmd,make-rlim)
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index b7106f192b75..d21a427a35ae 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -693,11 +693,9 @@ static struct kernel_param_ops param_ops_aalockpolicy = {
static int param_set_audit(const char *val, struct kernel_param *kp);
static int param_get_audit(char *buffer, struct kernel_param *kp);
-#define param_check_audit(name, p) __param_check(name, p, int)
static int param_set_mode(const char *val, struct kernel_param *kp);
static int param_get_mode(char *buffer, struct kernel_param *kp);
-#define param_check_mode(name, p) __param_check(name, p, int)
/* Flag values, also controllable via /sys/module/apparmor/parameters
* We define special types as we want to do additional mediation.
diff --git a/security/capability.c b/security/capability.c
index 2a5df2b7da83..ab3d807accc3 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -12,11 +12,6 @@
#include <linux/security.h>
-static int cap_sysctl(ctl_table *table, int op)
-{
- return 0;
-}
-
static int cap_syslog(int type)
{
return 0;
@@ -59,6 +54,11 @@ static int cap_sb_copy_data(char *orig, char *copy)
return 0;
}
+static int cap_sb_remount(struct super_block *sb, void *data)
+{
+ return 0;
+}
+
static int cap_sb_kern_mount(struct super_block *sb, int flags, void *data)
{
return 0;
@@ -118,7 +118,8 @@ static void cap_inode_free_security(struct inode *inode)
}
static int cap_inode_init_security(struct inode *inode, struct inode *dir,
- char **name, void **value, size_t *len)
+ const struct qstr *qstr, char **name,
+ void **value, size_t *len)
{
return -EOPNOTSUPP;
}
@@ -880,7 +881,6 @@ void __init security_fixup_ops(struct security_operations *ops)
set_to_cap_if_null(ops, capable);
set_to_cap_if_null(ops, quotactl);
set_to_cap_if_null(ops, quota_on);
- set_to_cap_if_null(ops, sysctl);
set_to_cap_if_null(ops, syslog);
set_to_cap_if_null(ops, settime);
set_to_cap_if_null(ops, vm_enough_memory);
@@ -892,6 +892,7 @@ void __init security_fixup_ops(struct security_operations *ops)
set_to_cap_if_null(ops, sb_alloc_security);
set_to_cap_if_null(ops, sb_free_security);
set_to_cap_if_null(ops, sb_copy_data);
+ set_to_cap_if_null(ops, sb_remount);
set_to_cap_if_null(ops, sb_kern_mount);
set_to_cap_if_null(ops, sb_show_options);
set_to_cap_if_null(ops, sb_statfs);
diff --git a/security/commoncap.c b/security/commoncap.c
index 64c2ed9c9015..dbfdaed4cc66 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -93,7 +93,7 @@ int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap,
* Determine whether the current process may set the system clock and timezone
* information, returning 0 if permission granted, -ve if denied.
*/
-int cap_settime(struct timespec *ts, struct timezone *tz)
+int cap_settime(const struct timespec *ts, const struct timezone *tz)
{
if (!capable(CAP_SYS_TIME))
return -EPERM;
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index ac79032bdf23..08408bd71462 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -110,8 +110,7 @@ struct ima_iint_cache {
};
/* LIM API function definitions */
-int ima_must_measure(struct ima_iint_cache *iint, struct inode *inode,
- int mask, int function);
+int ima_must_measure(struct inode *inode, int mask, int function);
int ima_collect_measurement(struct ima_iint_cache *iint, struct file *file);
void ima_store_measurement(struct ima_iint_cache *iint, struct file *file,
const unsigned char *filename);
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index d3963de6003d..da36d2c085a4 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -105,20 +105,13 @@ err_out:
* mask: contains the permission mask
* fsmagic: hex value
*
- * Must be called with iint->mutex held.
- *
- * Return 0 to measure. Return 1 if already measured.
- * For matching a DONT_MEASURE policy, no policy, or other
- * error, return an error code.
+ * Return 0 to measure. For matching a DONT_MEASURE policy, no policy,
+ * or other error, return an error code.
*/
-int ima_must_measure(struct ima_iint_cache *iint, struct inode *inode,
- int mask, int function)
+int ima_must_measure(struct inode *inode, int mask, int function)
{
int must_measure;
- if (iint && iint->flags & IMA_MEASURED)
- return 1;
-
must_measure = ima_match_policy(inode, function, mask);
return must_measure ? 0 : -EACCES;
}
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index c442e47b6785..4ae73040ab7b 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -137,11 +137,6 @@ void ima_inode_free(struct inode *inode)
{
struct ima_iint_cache *iint;
- if (inode->i_readcount)
- printk(KERN_INFO "%s: readcount: %u\n", __func__, inode->i_readcount);
-
- inode->i_readcount = 0;
-
if (!IS_IMA(inode))
return;
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 203de979d305..39d66dc2b8e9 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -36,67 +36,17 @@ static int __init hash_setup(char *str)
}
__setup("ima_hash=", hash_setup);
-struct ima_imbalance {
- struct hlist_node node;
- unsigned long fsmagic;
-};
-
-/*
- * ima_limit_imbalance - emit one imbalance message per filesystem type
- *
- * Maintain list of filesystem types that do not measure files properly.
- * Return false if unknown, true if known.
- */
-static bool ima_limit_imbalance(struct file *file)
-{
- static DEFINE_SPINLOCK(ima_imbalance_lock);
- static HLIST_HEAD(ima_imbalance_list);
-
- struct super_block *sb = file->f_dentry->d_sb;
- struct ima_imbalance *entry;
- struct hlist_node *node;
- bool found = false;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(entry, node, &ima_imbalance_list, node) {
- if (entry->fsmagic == sb->s_magic) {
- found = true;
- break;
- }
- }
- rcu_read_unlock();
- if (found)
- goto out;
-
- entry = kmalloc(sizeof(*entry), GFP_NOFS);
- if (!entry)
- goto out;
- entry->fsmagic = sb->s_magic;
- spin_lock(&ima_imbalance_lock);
- /*
- * we could have raced and something else might have added this fs
- * to the list, but we don't really care
- */
- hlist_add_head_rcu(&entry->node, &ima_imbalance_list);
- spin_unlock(&ima_imbalance_lock);
- printk(KERN_INFO "IMA: unmeasured files on fsmagic: %lX\n",
- entry->fsmagic);
-out:
- return found;
-}
-
/*
- * ima_counts_get - increment file counts
+ * ima_rdwr_violation_check
*
- * Maintain read/write counters for all files, but only
- * invalidate the PCR for measured files:
+ * Only invalidate the PCR for measured files:
* - Opening a file for write when already open for read,
* results in a time of measure, time of use (ToMToU) error.
* - Opening a file for read when already open for write,
* could result in a file measurement error.
*
*/
-void ima_counts_get(struct file *file)
+static void ima_rdwr_violation_check(struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
@@ -104,32 +54,25 @@ void ima_counts_get(struct file *file)
int rc;
bool send_tomtou = false, send_writers = false;
- if (!S_ISREG(inode->i_mode))
+ if (!S_ISREG(inode->i_mode) || !ima_initialized)
return;
- spin_lock(&inode->i_lock);
-
- if (!ima_initialized)
- goto out;
+ mutex_lock(&inode->i_mutex); /* file metadata: permissions, xattr */
if (mode & FMODE_WRITE) {
- if (inode->i_readcount && IS_IMA(inode))
+ if (atomic_read(&inode->i_readcount) && IS_IMA(inode))
send_tomtou = true;
goto out;
}
- rc = ima_must_measure(NULL, inode, MAY_READ, FILE_CHECK);
+ rc = ima_must_measure(inode, MAY_READ, FILE_CHECK);
if (rc < 0)
goto out;
if (atomic_read(&inode->i_writecount) > 0)
send_writers = true;
out:
- /* remember the vfs deals with i_writecount */
- if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
- inode->i_readcount++;
-
- spin_unlock(&inode->i_lock);
+ mutex_unlock(&inode->i_mutex);
if (send_tomtou)
ima_add_violation(inode, dentry->d_name.name, "invalid_pcr",
@@ -139,71 +82,25 @@ out:
"open_writers");
}
-/*
- * Decrement ima counts
- */
-static void ima_dec_counts(struct inode *inode, struct file *file)
-{
- mode_t mode = file->f_mode;
-
- assert_spin_locked(&inode->i_lock);
-
- if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
- if (unlikely(inode->i_readcount == 0)) {
- if (!ima_limit_imbalance(file)) {
- printk(KERN_INFO "%s: open/free imbalance (r:%u)\n",
- __func__, inode->i_readcount);
- dump_stack();
- }
- return;
- }
- inode->i_readcount--;
- }
-}
-
static void ima_check_last_writer(struct ima_iint_cache *iint,
struct inode *inode,
struct file *file)
{
mode_t mode = file->f_mode;
- BUG_ON(!mutex_is_locked(&iint->mutex));
- assert_spin_locked(&inode->i_lock);
-
+ mutex_lock(&iint->mutex);
if (mode & FMODE_WRITE &&
atomic_read(&inode->i_writecount) == 1 &&
iint->version != inode->i_version)
iint->flags &= ~IMA_MEASURED;
-}
-
-static void ima_file_free_iint(struct ima_iint_cache *iint, struct inode *inode,
- struct file *file)
-{
- mutex_lock(&iint->mutex);
- spin_lock(&inode->i_lock);
-
- ima_dec_counts(inode, file);
- ima_check_last_writer(iint, inode, file);
-
- spin_unlock(&inode->i_lock);
mutex_unlock(&iint->mutex);
}
-static void ima_file_free_noiint(struct inode *inode, struct file *file)
-{
- spin_lock(&inode->i_lock);
-
- ima_dec_counts(inode, file);
-
- spin_unlock(&inode->i_lock);
-}
-
/**
* ima_file_free - called on __fput()
* @file: pointer to file structure being freed
*
- * Flag files that changed, based on i_version;
- * and decrement the i_readcount.
+ * Flag files that changed, based on i_version
*/
void ima_file_free(struct file *file)
{
@@ -214,12 +111,10 @@ void ima_file_free(struct file *file)
return;
iint = ima_iint_find(inode);
+ if (!iint)
+ return;
- if (iint)
- ima_file_free_iint(iint, inode, file);
- else
- ima_file_free_noiint(inode, file);
-
+ ima_check_last_writer(iint, inode, file);
}
static int process_measurement(struct file *file, const unsigned char *filename,
@@ -232,7 +127,7 @@ static int process_measurement(struct file *file, const unsigned char *filename,
if (!ima_initialized || !S_ISREG(inode->i_mode))
return 0;
- rc = ima_must_measure(NULL, inode, mask, function);
+ rc = ima_must_measure(inode, mask, function);
if (rc != 0)
return rc;
retry:
@@ -246,7 +141,7 @@ retry:
mutex_lock(&iint->mutex);
- rc = ima_must_measure(iint, inode, mask, function);
+ rc = iint->flags & IMA_MEASURED ? 1 : 0;
if (rc != 0)
goto out;
@@ -317,6 +212,7 @@ int ima_file_check(struct file *file, int mask)
{
int rc;
+ ima_rdwr_violation_check(file);
rc = process_measurement(file, file->f_dentry->d_name.name,
mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
FILE_CHECK);
diff --git a/security/keys/compat.c b/security/keys/compat.c
index 07a5f35e3970..338b510e9027 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -12,9 +12,52 @@
#include <linux/syscalls.h>
#include <linux/keyctl.h>
#include <linux/compat.h>
+#include <linux/slab.h>
#include "internal.h"
/*
+ * Instantiate a key with the specified compatibility multipart payload and
+ * link the key into the destination keyring if one is given.
+ *
+ * The caller must have the appropriate instantiation permit set for this to
+ * work (see keyctl_assume_authority). No other permissions are required.
+ *
+ * If successful, 0 will be returned.
+ */
+long compat_keyctl_instantiate_key_iov(
+ key_serial_t id,
+ const struct compat_iovec __user *_payload_iov,
+ unsigned ioc,
+ key_serial_t ringid)
+{
+ struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+ long ret;
+
+ if (_payload_iov == 0 || ioc == 0)
+ goto no_payload;
+
+ ret = compat_rw_copy_check_uvector(WRITE, _payload_iov, ioc,
+ ARRAY_SIZE(iovstack),
+ iovstack, &iov);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ goto no_payload_free;
+
+ ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid);
+
+ if (iov != iovstack)
+ kfree(iov);
+ return ret;
+
+no_payload_free:
+ if (iov != iovstack)
+ kfree(iov);
+no_payload:
+ return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
+}
+
+/*
* The key control system call, 32-bit compatibility version for 64-bit archs
*
* This should only be called if the 64-bit arch uses weird pointers in 32-bit
@@ -85,6 +128,13 @@ asmlinkage long compat_sys_keyctl(u32 option,
case KEYCTL_SESSION_TO_PARENT:
return keyctl_session_to_parent();
+ case KEYCTL_REJECT:
+ return keyctl_reject_key(arg2, arg3, arg4, arg5);
+
+ case KEYCTL_INSTANTIATE_IOV:
+ return compat_keyctl_instantiate_key_iov(
+ arg2, compat_ptr(arg3), arg4, arg5);
+
default:
return -EOPNOTSUPP;
}
diff --git a/security/keys/encrypted.c b/security/keys/encrypted.c
index 9e7e4ce3fae8..69907a58a683 100644
--- a/security/keys/encrypted.c
+++ b/security/keys/encrypted.c
@@ -765,8 +765,7 @@ static long encrypted_read(const struct key *key, char __user *buffer,
size_t asciiblob_len;
int ret;
- epayload = rcu_dereference_protected(key->payload.data,
- rwsem_is_locked(&((struct key *)key)->sem));
+ epayload = rcu_dereference_key(key);
/* returns the hex encoded iv, encrypted-data, and hmac as ascii */
asciiblob_len = epayload->datablob_len + ivsize + 1
diff --git a/security/keys/internal.h b/security/keys/internal.h
index a52aa7c88b41..07a025f81902 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -214,6 +214,14 @@ extern long keyctl_assume_authority(key_serial_t);
extern long keyctl_get_security(key_serial_t keyid, char __user *buffer,
size_t buflen);
extern long keyctl_session_to_parent(void);
+extern long keyctl_reject_key(key_serial_t, unsigned, unsigned, key_serial_t);
+extern long keyctl_instantiate_key_iov(key_serial_t,
+ const struct iovec __user *,
+ unsigned, key_serial_t);
+
+extern long keyctl_instantiate_key_common(key_serial_t,
+ const struct iovec __user *,
+ unsigned, size_t, key_serial_t);
/*
* Debugging key validation
diff --git a/security/keys/key.c b/security/keys/key.c
index 1c2d43dc5107..f7f9d93f08d9 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -249,6 +249,14 @@ struct key *key_alloc(struct key_type *type, const char *desc,
if (!desc || !*desc)
goto error;
+ if (type->vet_description) {
+ ret = type->vet_description(desc);
+ if (ret < 0) {
+ key = ERR_PTR(ret);
+ goto error;
+ }
+ }
+
desclen = strlen(desc) + 1;
quotalen = desclen + type->def_datalen;
@@ -503,26 +511,29 @@ int key_instantiate_and_link(struct key *key,
EXPORT_SYMBOL(key_instantiate_and_link);
/**
- * key_negate_and_link - Negatively instantiate a key and link it into the keyring.
+ * key_reject_and_link - Negatively instantiate a key and link it into the keyring.
* @key: The key to instantiate.
* @timeout: The timeout on the negative key.
+ * @error: The error to return when the key is hit.
* @keyring: Keyring to create a link in on success (or NULL).
* @authkey: The authorisation token permitting instantiation.
*
* Negatively instantiate a key that's in the uninstantiated state and, if
- * successful, set its timeout and link it in to the destination keyring if one
- * is supplied. The key and any links to the key will be automatically garbage
- * collected after the timeout expires.
+ * successful, set its timeout and stored error and link it in to the
+ * destination keyring if one is supplied. The key and any links to the key
+ * will be automatically garbage collected after the timeout expires.
*
* Negative keys are used to rate limit repeated request_key() calls by causing
- * them to return -ENOKEY until the negative key expires.
+ * them to return the stored error code (typically ENOKEY) until the negative
+ * key expires.
*
* If successful, 0 is returned, the authorisation token is revoked and anyone
* waiting for the key is woken up. If the key was already instantiated,
* -EBUSY will be returned.
*/
-int key_negate_and_link(struct key *key,
+int key_reject_and_link(struct key *key,
unsigned timeout,
+ unsigned error,
struct key *keyring,
struct key *authkey)
{
@@ -548,6 +559,7 @@ int key_negate_and_link(struct key *key,
atomic_inc(&key->user->nikeys);
set_bit(KEY_FLAG_NEGATIVE, &key->flags);
set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
+ key->type_data.reject_error = -error;
now = current_kernel_time();
key->expiry = now.tv_sec + timeout;
key_schedule_gc(key->expiry + key_gc_delay);
@@ -577,8 +589,7 @@ int key_negate_and_link(struct key *key,
return ret == 0 ? link_ret : ret;
}
-
-EXPORT_SYMBOL(key_negate_and_link);
+EXPORT_SYMBOL(key_reject_and_link);
/*
* Garbage collect keys in process context so that we don't have to disable
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 31a0fd8189f1..427fddcaeb19 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -913,6 +913,21 @@ static int keyctl_change_reqkey_auth(struct key *key)
}
/*
+ * Copy the iovec data from userspace
+ */
+static long copy_from_user_iovec(void *buffer, const struct iovec *iov,
+ unsigned ioc)
+{
+ for (; ioc > 0; ioc--) {
+ if (copy_from_user(buffer, iov->iov_base, iov->iov_len) != 0)
+ return -EFAULT;
+ buffer += iov->iov_len;
+ iov++;
+ }
+ return 0;
+}
+
+/*
* Instantiate a key with the specified payload and link the key into the
* destination keyring if one is given.
*
@@ -921,10 +936,11 @@ static int keyctl_change_reqkey_auth(struct key *key)
*
* If successful, 0 will be returned.
*/
-long keyctl_instantiate_key(key_serial_t id,
- const void __user *_payload,
- size_t plen,
- key_serial_t ringid)
+long keyctl_instantiate_key_common(key_serial_t id,
+ const struct iovec *payload_iov,
+ unsigned ioc,
+ size_t plen,
+ key_serial_t ringid)
{
const struct cred *cred = current_cred();
struct request_key_auth *rka;
@@ -953,7 +969,7 @@ long keyctl_instantiate_key(key_serial_t id,
/* pull the payload in if one was supplied */
payload = NULL;
- if (_payload) {
+ if (payload_iov) {
ret = -ENOMEM;
payload = kmalloc(plen, GFP_KERNEL);
if (!payload) {
@@ -965,8 +981,8 @@ long keyctl_instantiate_key(key_serial_t id,
goto error;
}
- ret = -EFAULT;
- if (copy_from_user(payload, _payload, plen) != 0)
+ ret = copy_from_user_iovec(payload, payload_iov, ioc);
+ if (ret < 0)
goto error2;
}
@@ -997,6 +1013,72 @@ error:
}
/*
+ * Instantiate a key with the specified payload and link the key into the
+ * destination keyring if one is given.
+ *
+ * The caller must have the appropriate instantiation permit set for this to
+ * work (see keyctl_assume_authority). No other permissions are required.
+ *
+ * If successful, 0 will be returned.
+ */
+long keyctl_instantiate_key(key_serial_t id,
+ const void __user *_payload,
+ size_t plen,
+ key_serial_t ringid)
+{
+ if (_payload && plen) {
+ struct iovec iov[1] = {
+ [0].iov_base = (void __user *)_payload,
+ [0].iov_len = plen
+ };
+
+ return keyctl_instantiate_key_common(id, iov, 1, plen, ringid);
+ }
+
+ return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
+}
+
+/*
+ * Instantiate a key with the specified multipart payload and link the key into
+ * the destination keyring if one is given.
+ *
+ * The caller must have the appropriate instantiation permit set for this to
+ * work (see keyctl_assume_authority). No other permissions are required.
+ *
+ * If successful, 0 will be returned.
+ */
+long keyctl_instantiate_key_iov(key_serial_t id,
+ const struct iovec __user *_payload_iov,
+ unsigned ioc,
+ key_serial_t ringid)
+{
+ struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
+ long ret;
+
+ if (_payload_iov == 0 || ioc == 0)
+ goto no_payload;
+
+ ret = rw_copy_check_uvector(WRITE, _payload_iov, ioc,
+ ARRAY_SIZE(iovstack), iovstack, &iov);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ goto no_payload_free;
+
+ ret = keyctl_instantiate_key_common(id, iov, ioc, ret, ringid);
+
+ if (iov != iovstack)
+ kfree(iov);
+ return ret;
+
+no_payload_free:
+ if (iov != iovstack)
+ kfree(iov);
+no_payload:
+ return keyctl_instantiate_key_common(id, NULL, 0, 0, ringid);
+}
+
+/*
* Negatively instantiate the key with the given timeout (in seconds) and link
* the key into the destination keyring if one is given.
*
@@ -1013,12 +1095,42 @@ error:
*/
long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
{
+ return keyctl_reject_key(id, timeout, ENOKEY, ringid);
+}
+
+/*
+ * Negatively instantiate the key with the given timeout (in seconds) and error
+ * code and link the key into the destination keyring if one is given.
+ *
+ * The caller must have the appropriate instantiation permit set for this to
+ * work (see keyctl_assume_authority). No other permissions are required.
+ *
+ * The key and any links to the key will be automatically garbage collected
+ * after the timeout expires.
+ *
+ * Negative keys are used to rate limit repeated request_key() calls by causing
+ * them to return the specified error code until the negative key expires.
+ *
+ * If successful, 0 will be returned.
+ */
+long keyctl_reject_key(key_serial_t id, unsigned timeout, unsigned error,
+ key_serial_t ringid)
+{
const struct cred *cred = current_cred();
struct request_key_auth *rka;
struct key *instkey, *dest_keyring;
long ret;
- kenter("%d,%u,%d", id, timeout, ringid);
+ kenter("%d,%u,%u,%d", id, timeout, error, ringid);
+
+ /* must be a valid error code and mustn't be a kernel special */
+ if (error <= 0 ||
+ error >= MAX_ERRNO ||
+ error == ERESTARTSYS ||
+ error == ERESTARTNOINTR ||
+ error == ERESTARTNOHAND ||
+ error == ERESTART_RESTARTBLOCK)
+ return -EINVAL;
/* the appropriate instantiation authorisation key must have been
* assumed before calling this */
@@ -1038,7 +1150,7 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
goto error;
/* instantiate the key and link it into a keyring */
- ret = key_negate_and_link(rka->target_key, timeout,
+ ret = key_reject_and_link(rka->target_key, timeout, error,
dest_keyring, instkey);
key_put(dest_keyring);
@@ -1492,6 +1604,19 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
case KEYCTL_SESSION_TO_PARENT:
return keyctl_session_to_parent();
+ case KEYCTL_REJECT:
+ return keyctl_reject_key((key_serial_t) arg2,
+ (unsigned) arg3,
+ (unsigned) arg4,
+ (key_serial_t) arg5);
+
+ case KEYCTL_INSTANTIATE_IOV:
+ return keyctl_instantiate_key_iov(
+ (key_serial_t) arg2,
+ (const struct iovec __user *) arg3,
+ (unsigned) arg4,
+ (key_serial_t) arg5);
+
default:
return -EOPNOTSUPP;
}
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 5620f084dede..cdd2f3f88c88 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -352,7 +352,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref,
goto error_2;
if (key->expiry && now.tv_sec >= key->expiry)
goto error_2;
- key_ref = ERR_PTR(-ENOKEY);
+ key_ref = ERR_PTR(key->type_data.reject_error);
if (kflags & (1 << KEY_FLAG_NEGATIVE))
goto error_2;
goto found;
@@ -401,7 +401,7 @@ descend:
/* we set a different error code if we pass a negative key */
if (kflags & (1 << KEY_FLAG_NEGATIVE)) {
- err = -ENOKEY;
+ err = key->type_data.reject_error;
continue;
}
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index a3dc0d460def..df3c0417ee40 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -585,7 +585,7 @@ int wait_for_key_construction(struct key *key, bool intr)
if (ret < 0)
return ret;
if (test_bit(KEY_FLAG_NEGATIVE, &key->flags))
- return -ENOKEY;
+ return key->type_data.reject_error;
return key_validate(key);
}
EXPORT_SYMBOL(wait_for_key_construction);
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 83fc92e297cd..c99b9368368c 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -1076,8 +1076,7 @@ static long trusted_read(const struct key *key, char __user *buffer,
char *bufp;
int i;
- p = rcu_dereference_protected(key->payload.data,
- rwsem_is_locked(&((struct key *)key)->sem));
+ p = rcu_dereference_key(key);
if (!p)
return -EINVAL;
if (!buffer || buflen <= 0)
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index 02807fb16340..c6ca8662a468 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -184,8 +184,7 @@ long user_read(const struct key *key, char __user *buffer, size_t buflen)
struct user_key_payload *upayload;
long ret;
- upayload = rcu_dereference_protected(
- key->payload.data, rwsem_is_locked(&((struct key *)key)->sem));
+ upayload = rcu_dereference_key(key);
ret = upayload->datalen;
/* we can return the data as is */
diff --git a/security/security.c b/security/security.c
index 7b7308ace8c5..bab9b23c3ff4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -181,11 +181,6 @@ int security_real_capable_noaudit(struct task_struct *tsk, int cap)
return ret;
}
-int security_sysctl(struct ctl_table *table, int op)
-{
- return security_ops->sysctl(table, op);
-}
-
int security_quotactl(int cmds, int type, int id, struct super_block *sb)
{
return security_ops->quotactl(cmds, type, id, sb);
@@ -201,7 +196,7 @@ int security_syslog(int type)
return security_ops->syslog(type);
}
-int security_settime(struct timespec *ts, struct timezone *tz)
+int security_settime(const struct timespec *ts, const struct timezone *tz)
{
return security_ops->settime(ts, tz);
}
@@ -271,6 +266,11 @@ int security_sb_copy_data(char *orig, char *copy)
}
EXPORT_SYMBOL(security_sb_copy_data);
+int security_sb_remount(struct super_block *sb, void *data)
+{
+ return security_ops->sb_remount(sb, data);
+}
+
int security_sb_kern_mount(struct super_block *sb, int flags, void *data)
{
return security_ops->sb_kern_mount(sb, flags, data);
@@ -335,11 +335,13 @@ void security_inode_free(struct inode *inode)
}
int security_inode_init_security(struct inode *inode, struct inode *dir,
- char **name, void **value, size_t *len)
+ const struct qstr *qstr, char **name,
+ void **value, size_t *len)
{
if (unlikely(IS_PRIVATE(inode)))
return -EOPNOTSUPP;
- return security_ops->inode_init_security(inode, dir, name, value, len);
+ return security_ops->inode_init_security(inode, dir, qstr, name, value,
+ len);
}
EXPORT_SYMBOL(security_inode_init_security);
@@ -359,6 +361,7 @@ int security_path_mkdir(struct path *dir, struct dentry *dentry, int mode)
return 0;
return security_ops->path_mkdir(dir, dentry, mode);
}
+EXPORT_SYMBOL(security_path_mkdir);
int security_path_rmdir(struct path *dir, struct dentry *dentry)
{
@@ -373,6 +376,7 @@ int security_path_unlink(struct path *dir, struct dentry *dentry)
return 0;
return security_ops->path_unlink(dir, dentry);
}
+EXPORT_SYMBOL(security_path_unlink);
int security_path_symlink(struct path *dir, struct dentry *dentry,
const char *old_name)
@@ -399,6 +403,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
return security_ops->path_rename(old_dir, old_dentry, new_dir,
new_dentry);
}
+EXPORT_SYMBOL(security_path_rename);
int security_path_truncate(struct path *path)
{
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index c8d699270687..d52a92507412 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -24,9 +24,11 @@
*/
#include <linux/init.h>
+#include <linux/kd.h>
#include <linux/kernel.h>
#include <linux/tracehook.h>
#include <linux/errno.h>
+#include <linux/ext2_fs.h>
#include <linux/sched.h>
#include <linux/security.h>
#include <linux/xattr.h>
@@ -36,14 +38,15 @@
#include <linux/mman.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/proc_fs.h>
#include <linux/swap.h>
#include <linux/spinlock.h>
#include <linux/syscalls.h>
+#include <linux/dcache.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/namei.h>
#include <linux/mount.h>
-#include <linux/proc_fs.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/tty.h>
@@ -70,7 +73,6 @@
#include <net/ipv6.h>
#include <linux/hugetlb.h>
#include <linux/personality.h>
-#include <linux/sysctl.h>
#include <linux/audit.h>
#include <linux/string.h>
#include <linux/selinux.h>
@@ -1120,39 +1122,35 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc
}
#ifdef CONFIG_PROC_FS
-static int selinux_proc_get_sid(struct proc_dir_entry *de,
+static int selinux_proc_get_sid(struct dentry *dentry,
u16 tclass,
u32 *sid)
{
- int buflen, rc;
- char *buffer, *path, *end;
+ int rc;
+ char *buffer, *path;
buffer = (char *)__get_free_page(GFP_KERNEL);
if (!buffer)
return -ENOMEM;
- buflen = PAGE_SIZE;
- end = buffer+buflen;
- *--end = '\0';
- buflen--;
- path = end-1;
- *path = '/';
- while (de && de != de->parent) {
- buflen -= de->namelen + 1;
- if (buflen < 0)
- break;
- end -= de->namelen;
- memcpy(end, de->name, de->namelen);
- *--end = '/';
- path = end;
- de = de->parent;
+ path = dentry_path_raw(dentry, buffer, PAGE_SIZE);
+ if (IS_ERR(path))
+ rc = PTR_ERR(path);
+ else {
+ /* each process gets a /proc/PID/ entry. Strip off the
+ * PID part to get a valid selinux labeling.
+ * e.g. /proc/1/net/rpc/nfs -> /net/rpc/nfs */
+ while (path[1] >= '0' && path[1] <= '9') {
+ path[1] = '/';
+ path++;
+ }
+ rc = security_genfs_sid("proc", path, tclass, sid);
}
- rc = security_genfs_sid("proc", path, tclass, sid);
free_page((unsigned long)buffer);
return rc;
}
#else
-static int selinux_proc_get_sid(struct proc_dir_entry *de,
+static int selinux_proc_get_sid(struct dentry *dentry,
u16 tclass,
u32 *sid)
{
@@ -1300,10 +1298,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
/* Try to obtain a transition SID. */
isec->sclass = inode_mode_to_security_class(inode->i_mode);
- rc = security_transition_sid(isec->task_sid,
- sbsec->sid,
- isec->sclass,
- &sid);
+ rc = security_transition_sid(isec->task_sid, sbsec->sid,
+ isec->sclass, NULL, &sid);
if (rc)
goto out_unlock;
isec->sid = sid;
@@ -1316,10 +1312,9 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
isec->sid = sbsec->sid;
if ((sbsec->flags & SE_SBPROC) && !S_ISLNK(inode->i_mode)) {
- struct proc_inode *proci = PROC_I(inode);
- if (proci->pde) {
+ if (opt_dentry) {
isec->sclass = inode_mode_to_security_class(inode->i_mode);
- rc = selinux_proc_get_sid(proci->pde,
+ rc = selinux_proc_get_sid(opt_dentry,
isec->sclass,
&sid);
if (rc)
@@ -1578,7 +1573,7 @@ static int may_create(struct inode *dir,
return rc;
if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
- rc = security_transition_sid(sid, dsec->sid, tclass, &newsid);
+ rc = security_transition_sid(sid, dsec->sid, tclass, NULL, &newsid);
if (rc)
return rc;
}
@@ -1862,82 +1857,6 @@ static int selinux_capable(struct task_struct *tsk, const struct cred *cred,
return task_has_capability(tsk, cred, cap, audit);
}
-static int selinux_sysctl_get_sid(ctl_table *table, u16 tclass, u32 *sid)
-{
- int buflen, rc;
- char *buffer, *path, *end;
-
- rc = -ENOMEM;
- buffer = (char *)__get_free_page(GFP_KERNEL);
- if (!buffer)
- goto out;
-
- buflen = PAGE_SIZE;
- end = buffer+buflen;
- *--end = '\0';
- buflen--;
- path = end-1;
- *path = '/';
- while (table) {
- const char *name = table->procname;
- size_t namelen = strlen(name);
- buflen -= namelen + 1;
- if (buflen < 0)
- goto out_free;
- end -= namelen;
- memcpy(end, name, namelen);
- *--end = '/';
- path = end;
- table = table->parent;
- }
- buflen -= 4;
- if (buflen < 0)
- goto out_free;
- end -= 4;
- memcpy(end, "/sys", 4);
- path = end;
- rc = security_genfs_sid("proc", path, tclass, sid);
-out_free:
- free_page((unsigned long)buffer);
-out:
- return rc;
-}
-
-static int selinux_sysctl(ctl_table *table, int op)
-{
- int error = 0;
- u32 av;
- u32 tsid, sid;
- int rc;
-
- sid = current_sid();
-
- rc = selinux_sysctl_get_sid(table, (op == 0001) ?
- SECCLASS_DIR : SECCLASS_FILE, &tsid);
- if (rc) {
- /* Default to the well-defined sysctl SID. */
- tsid = SECINITSID_SYSCTL;
- }
-
- /* The op values are "defined" in sysctl.c, thereby creating
- * a bad coupling between this module and sysctl.c */
- if (op == 001) {
- error = avc_has_perm(sid, tsid,
- SECCLASS_DIR, DIR__SEARCH, NULL);
- } else {
- av = 0;
- if (op & 004)
- av |= FILE__READ;
- if (op & 002)
- av |= FILE__WRITE;
- if (av)
- error = avc_has_perm(sid, tsid,
- SECCLASS_FILE, av, NULL);
- }
-
- return error;
-}
-
static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb)
{
const struct cred *cred = current_cred();
@@ -2060,7 +1979,8 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
} else {
/* Check for a default transition on this program. */
rc = security_transition_sid(old_tsec->sid, isec->sid,
- SECCLASS_PROCESS, &new_tsec->sid);
+ SECCLASS_PROCESS, NULL,
+ &new_tsec->sid);
if (rc)
return rc;
}
@@ -2443,6 +2363,91 @@ out:
return rc;
}
+static int selinux_sb_remount(struct super_block *sb, void *data)
+{
+ int rc, i, *flags;
+ struct security_mnt_opts opts;
+ char *secdata, **mount_options;
+ struct superblock_security_struct *sbsec = sb->s_security;
+
+ if (!(sbsec->flags & SE_SBINITIALIZED))
+ return 0;
+
+ if (!data)
+ return 0;
+
+ if (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)
+ return 0;
+
+ security_init_mnt_opts(&opts);
+ secdata = alloc_secdata();
+ if (!secdata)
+ return -ENOMEM;
+ rc = selinux_sb_copy_data(data, secdata);
+ if (rc)
+ goto out_free_secdata;
+
+ rc = selinux_parse_opts_str(secdata, &opts);
+ if (rc)
+ goto out_free_secdata;
+
+ mount_options = opts.mnt_opts;
+ flags = opts.mnt_opts_flags;
+
+ for (i = 0; i < opts.num_mnt_opts; i++) {
+ u32 sid;
+ size_t len;
+
+ if (flags[i] == SE_SBLABELSUPP)
+ continue;
+ len = strlen(mount_options[i]);
+ rc = security_context_to_sid(mount_options[i], len, &sid);
+ if (rc) {
+ printk(KERN_WARNING "SELinux: security_context_to_sid"
+ "(%s) failed for (dev %s, type %s) errno=%d\n",
+ mount_options[i], sb->s_id, sb->s_type->name, rc);
+ goto out_free_opts;
+ }
+ rc = -EINVAL;
+ switch (flags[i]) {
+ case FSCONTEXT_MNT:
+ if (bad_option(sbsec, FSCONTEXT_MNT, sbsec->sid, sid))
+ goto out_bad_option;
+ break;
+ case CONTEXT_MNT:
+ if (bad_option(sbsec, CONTEXT_MNT, sbsec->mntpoint_sid, sid))
+ goto out_bad_option;
+ break;
+ case ROOTCONTEXT_MNT: {
+ struct inode_security_struct *root_isec;
+ root_isec = sb->s_root->d_inode->i_security;
+
+ if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid))
+ goto out_bad_option;
+ break;
+ }
+ case DEFCONTEXT_MNT:
+ if (bad_option(sbsec, DEFCONTEXT_MNT, sbsec->def_sid, sid))
+ goto out_bad_option;
+ break;
+ default:
+ goto out_free_opts;
+ }
+ }
+
+ rc = 0;
+out_free_opts:
+ security_free_mnt_opts(&opts);
+out_free_secdata:
+ free_secdata(secdata);
+ return rc;
+out_bad_option:
+ printk(KERN_WARNING "SELinux: unable to change security options "
+ "during remount (dev %s, type=%s)\n", sb->s_id,
+ sb->s_type->name);
+ goto out_free_opts;
+}
+
static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
{
const struct cred *cred = current_cred();
@@ -2509,8 +2514,8 @@ static void selinux_inode_free_security(struct inode *inode)
}
static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
- char **name, void **value,
- size_t *len)
+ const struct qstr *qstr, char **name,
+ void **value, size_t *len)
{
const struct task_security_struct *tsec = current_security();
struct inode_security_struct *dsec;
@@ -2531,7 +2536,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
else if (!newsid || !(sbsec->flags & SE_SBLABELSUPP)) {
rc = security_transition_sid(sid, dsec->sid,
inode_mode_to_security_class(inode->i_mode),
- &newsid);
+ qstr, &newsid);
if (rc) {
printk(KERN_WARNING "%s: "
"security_transition_sid failed, rc=%d (dev=%s "
@@ -2932,16 +2937,47 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
const struct cred *cred = current_cred();
- u32 av = 0;
+ int error = 0;
- if (_IOC_DIR(cmd) & _IOC_WRITE)
- av |= FILE__WRITE;
- if (_IOC_DIR(cmd) & _IOC_READ)
- av |= FILE__READ;
- if (!av)
- av = FILE__IOCTL;
+ switch (cmd) {
+ case FIONREAD:
+ /* fall through */
+ case FIBMAP:
+ /* fall through */
+ case FIGETBSZ:
+ /* fall through */
+ case EXT2_IOC_GETFLAGS:
+ /* fall through */
+ case EXT2_IOC_GETVERSION:
+ error = file_has_perm(cred, file, FILE__GETATTR);
+ break;
+
+ case EXT2_IOC_SETFLAGS:
+ /* fall through */
+ case EXT2_IOC_SETVERSION:
+ error = file_has_perm(cred, file, FILE__SETATTR);
+ break;
+
+ /* sys_ioctl() checks */
+ case FIONBIO:
+ /* fall through */
+ case FIOASYNC:
+ error = file_has_perm(cred, file, 0);
+ break;
- return file_has_perm(cred, file, av);
+ case KDSKBENT:
+ case KDSKBSENT:
+ error = task_has_capability(current, cred, CAP_SYS_TTY_CONFIG,
+ SECURITY_CAP_AUDIT);
+ break;
+
+ /* default case assumes that the command will go
+ * to the file's ioctl() function.
+ */
+ default:
+ error = file_has_perm(cred, file, FILE__IOCTL);
+ }
+ return error;
}
static int default_noexec;
@@ -3644,9 +3680,16 @@ static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid)
/* socket security operations */
-static u32 socket_sockcreate_sid(const struct task_security_struct *tsec)
+static int socket_sockcreate_sid(const struct task_security_struct *tsec,
+ u16 secclass, u32 *socksid)
{
- return tsec->sockcreate_sid ? : tsec->sid;
+ if (tsec->sockcreate_sid > SECSID_NULL) {
+ *socksid = tsec->sockcreate_sid;
+ return 0;
+ }
+
+ return security_transition_sid(tsec->sid, tsec->sid, secclass, NULL,
+ socksid);
}
static int sock_has_perm(struct task_struct *task, struct sock *sk, u32 perms)
@@ -3670,12 +3713,16 @@ static int selinux_socket_create(int family, int type,
const struct task_security_struct *tsec = current_security();
u32 newsid;
u16 secclass;
+ int rc;
if (kern)
return 0;
- newsid = socket_sockcreate_sid(tsec);
secclass = socket_type_to_security_class(family, type, protocol);
+ rc = socket_sockcreate_sid(tsec, secclass, &newsid);
+ if (rc)
+ return rc;
+
return avc_has_perm(tsec->sid, newsid, secclass, SOCKET__CREATE, NULL);
}
@@ -3687,12 +3734,16 @@ static int selinux_socket_post_create(struct socket *sock, int family,
struct sk_security_struct *sksec;
int err = 0;
+ isec->sclass = socket_type_to_security_class(family, type, protocol);
+
if (kern)
isec->sid = SECINITSID_KERNEL;
- else
- isec->sid = socket_sockcreate_sid(tsec);
+ else {
+ err = socket_sockcreate_sid(tsec, isec->sclass, &(isec->sid));
+ if (err)
+ return err;
+ }
- isec->sclass = socket_type_to_security_class(family, type, protocol);
isec->initialized = 1;
if (sock->sk) {
@@ -4002,7 +4053,6 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
{
int err = 0;
struct sk_security_struct *sksec = sk->sk_security;
- u32 peer_sid;
u32 sk_sid = sksec->sid;
struct common_audit_data ad;
char *addrp;
@@ -4021,20 +4071,10 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
return err;
}
- if (selinux_policycap_netpeer) {
- err = selinux_skb_peerlbl_sid(skb, family, &peer_sid);
- if (err)
- return err;
- err = avc_has_perm(sk_sid, peer_sid,
- SECCLASS_PEER, PEER__RECV, &ad);
- if (err)
- selinux_netlbl_err(skb, err, 0);
- } else {
- err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad);
- if (err)
- return err;
- err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad);
- }
+ err = selinux_netlbl_sock_rcv_skb(sksec, skb, family, &ad);
+ if (err)
+ return err;
+ err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad);
return err;
}
@@ -4529,9 +4569,8 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
SECCLASS_PACKET, PACKET__SEND, &ad))
return NF_DROP_ERR(-ECONNREFUSED);
- if (selinux_policycap_netpeer)
- if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto))
- return NF_DROP_ERR(-ECONNREFUSED);
+ if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto))
+ return NF_DROP_ERR(-ECONNREFUSED);
return NF_ACCEPT;
}
@@ -4574,27 +4613,14 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
* from the sending socket, otherwise use the kernel's sid */
sk = skb->sk;
if (sk == NULL) {
- switch (family) {
- case PF_INET:
- if (IPCB(skb)->flags & IPSKB_FORWARDED)
- secmark_perm = PACKET__FORWARD_OUT;
- else
- secmark_perm = PACKET__SEND;
- break;
- case PF_INET6:
- if (IP6CB(skb)->flags & IP6SKB_FORWARDED)
- secmark_perm = PACKET__FORWARD_OUT;
- else
- secmark_perm = PACKET__SEND;
- break;
- default:
- return NF_DROP_ERR(-ECONNREFUSED);
- }
- if (secmark_perm == PACKET__FORWARD_OUT) {
+ if (skb->skb_iif) {
+ secmark_perm = PACKET__FORWARD_OUT;
if (selinux_skb_peerlbl_sid(skb, family, &peer_sid))
return NF_DROP;
- } else
+ } else {
+ secmark_perm = PACKET__SEND;
peer_sid = SECINITSID_KERNEL;
+ }
} else {
struct sk_security_struct *sksec = sk->sk_security;
peer_sid = sksec->sid;
@@ -4848,7 +4874,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
* message queue this message will be stored in
*/
rc = security_transition_sid(sid, isec->sid, SECCLASS_MSG,
- &msec->sid);
+ NULL, &msec->sid);
if (rc)
return rc;
}
@@ -5402,7 +5428,6 @@ static struct security_operations selinux_ops = {
.ptrace_traceme = selinux_ptrace_traceme,
.capget = selinux_capget,
.capset = selinux_capset,
- .sysctl = selinux_sysctl,
.capable = selinux_capable,
.quotactl = selinux_quotactl,
.quota_on = selinux_quota_on,
@@ -5420,6 +5445,7 @@ static struct security_operations selinux_ops = {
.sb_alloc_security = selinux_sb_alloc_security,
.sb_free_security = selinux_sb_free_security,
.sb_copy_data = selinux_sb_copy_data,
+ .sb_remount = selinux_sb_remount,
.sb_kern_mount = selinux_sb_kern_mount,
.sb_show_options = selinux_sb_show_options,
.sb_statfs = selinux_sb_statfs,
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 7ed3663332ec..b8c53723e09b 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -12,6 +12,10 @@
#define COMMON_IPC_PERMS "create", "destroy", "getattr", "setattr", "read", \
"write", "associate", "unix_read", "unix_write"
+/*
+ * Note: The name for any socket class should be suffixed by "socket",
+ * and doesn't contain more than one substr of "socket".
+ */
struct security_class_mapping secclass_map[] = {
{ "security",
{ "compute_av", "compute_create", "compute_member",
@@ -132,8 +136,7 @@ struct security_class_mapping secclass_map[] = {
{ "appletalk_socket",
{ COMMON_SOCK_PERMS, NULL } },
{ "packet",
- { "send", "recv", "relabelto", "flow_in", "flow_out",
- "forward_in", "forward_out", NULL } },
+ { "send", "recv", "relabelto", "forward_in", "forward_out", NULL } },
{ "key",
{ "view", "read", "write", "search", "link", "setattr", "create",
NULL } },
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 671273eb1115..348eb00cb668 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -8,6 +8,7 @@
#ifndef _SELINUX_SECURITY_H_
#define _SELINUX_SECURITY_H_
+#include <linux/dcache.h>
#include <linux/magic.h>
#include <linux/types.h>
#include "flask.h"
@@ -28,13 +29,14 @@
#define POLICYDB_VERSION_POLCAP 22
#define POLICYDB_VERSION_PERMISSIVE 23
#define POLICYDB_VERSION_BOUNDARY 24
+#define POLICYDB_VERSION_FILENAME_TRANS 25
/* Range of policy versions we understand*/
#define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE
#ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX
#define POLICYDB_VERSION_MAX CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
#else
-#define POLICYDB_VERSION_MAX POLICYDB_VERSION_BOUNDARY
+#define POLICYDB_VERSION_MAX POLICYDB_VERSION_FILENAME_TRANS
#endif
/* Mask for just the mount related flags */
@@ -106,8 +108,8 @@ void security_compute_av(u32 ssid, u32 tsid,
void security_compute_av_user(u32 ssid, u32 tsid,
u16 tclass, struct av_decision *avd);
-int security_transition_sid(u32 ssid, u32 tsid,
- u16 tclass, u32 *out_sid);
+int security_transition_sid(u32 ssid, u32 tsid, u16 tclass,
+ const struct qstr *qstr, u32 *out_sid);
int security_transition_sid_user(u32 ssid, u32 tsid,
u16 tclass, u32 *out_sid);
diff --git a/security/selinux/ss/avtab.h b/security/selinux/ss/avtab.h
index dff0c75345c1..63ce2f9e441d 100644
--- a/security/selinux/ss/avtab.h
+++ b/security/selinux/ss/avtab.h
@@ -14,7 +14,7 @@
*
* Copyright (C) 2003 Tresys Technology, LLC
* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
+ * it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* Updated: Yuichi Nakamura <ynakam@hitachisoft.jp>
@@ -27,16 +27,16 @@ struct avtab_key {
u16 source_type; /* source type */
u16 target_type; /* target type */
u16 target_class; /* target object class */
-#define AVTAB_ALLOWED 1
-#define AVTAB_AUDITALLOW 2
-#define AVTAB_AUDITDENY 4
-#define AVTAB_AV (AVTAB_ALLOWED | AVTAB_AUDITALLOW | AVTAB_AUDITDENY)
-#define AVTAB_TRANSITION 16
-#define AVTAB_MEMBER 32
-#define AVTAB_CHANGE 64
-#define AVTAB_TYPE (AVTAB_TRANSITION | AVTAB_MEMBER | AVTAB_CHANGE)
-#define AVTAB_ENABLED_OLD 0x80000000 /* reserved for used in cond_avtab */
-#define AVTAB_ENABLED 0x8000 /* reserved for used in cond_avtab */
+#define AVTAB_ALLOWED 0x0001
+#define AVTAB_AUDITALLOW 0x0002
+#define AVTAB_AUDITDENY 0x0004
+#define AVTAB_AV (AVTAB_ALLOWED | AVTAB_AUDITALLOW | AVTAB_AUDITDENY)
+#define AVTAB_TRANSITION 0x0010
+#define AVTAB_MEMBER 0x0020
+#define AVTAB_CHANGE 0x0040
+#define AVTAB_TYPE (AVTAB_TRANSITION | AVTAB_MEMBER | AVTAB_CHANGE)
+#define AVTAB_ENABLED_OLD 0x80000000 /* reserved for used in cond_avtab */
+#define AVTAB_ENABLED 0x8000 /* reserved for used in cond_avtab */
u16 specified; /* what field is specified */
};
@@ -86,7 +86,6 @@ void avtab_cache_destroy(void);
#define MAX_AVTAB_HASH_BITS 11
#define MAX_AVTAB_HASH_BUCKETS (1 << MAX_AVTAB_HASH_BITS)
-#define MAX_AVTAB_HASH_MASK (MAX_AVTAB_HASH_BUCKETS-1)
#endif /* _SS_AVTAB_H_ */
diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h
index 1f4e93c2ae86..922f8afa89dd 100644
--- a/security/selinux/ss/ebitmap.h
+++ b/security/selinux/ss/ebitmap.h
@@ -36,7 +36,6 @@ struct ebitmap {
};
#define ebitmap_length(e) ((e)->highbit)
-#define ebitmap_startbit(e) ((e)->node ? (e)->node->startbit : 0)
static inline unsigned int ebitmap_start_positive(struct ebitmap *e,
struct ebitmap_node **n)
diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 1ef8e4e89880..e96174216bc9 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -512,7 +512,8 @@ int mls_compute_sid(struct context *scontext,
struct context *tcontext,
u16 tclass,
u32 specified,
- struct context *newcontext)
+ struct context *newcontext,
+ bool sock)
{
struct range_trans rtr;
struct mls_range *r;
@@ -531,7 +532,7 @@ int mls_compute_sid(struct context *scontext,
return mls_range_set(newcontext, r);
/* Fallthrough */
case AVTAB_CHANGE:
- if (tclass == policydb.process_class)
+ if ((tclass == policydb.process_class) || (sock == true))
/* Use the process MLS attributes. */
return mls_context_cpy(newcontext, scontext);
else
diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h
index cd9152632e54..037bf9d82d41 100644
--- a/security/selinux/ss/mls.h
+++ b/security/selinux/ss/mls.h
@@ -49,7 +49,8 @@ int mls_compute_sid(struct context *scontext,
struct context *tcontext,
u16 tclass,
u32 specified,
- struct context *newcontext);
+ struct context *newcontext,
+ bool sock);
int mls_setup_user_range(struct context *fromcon, struct user_datum *user,
struct context *usercon);
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 57363562f0f8..e7b850ad57ee 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -123,6 +123,11 @@ static struct policydb_compat_info policydb_compat[] = {
.sym_num = SYM_NUM,
.ocon_num = OCON_NUM,
},
+ {
+ .version = POLICYDB_VERSION_FILENAME_TRANS,
+ .sym_num = SYM_NUM,
+ .ocon_num = OCON_NUM,
+ },
};
static struct policydb_compat_info *policydb_lookup_compat(int version)
@@ -704,6 +709,7 @@ void policydb_destroy(struct policydb *p)
int i;
struct role_allow *ra, *lra = NULL;
struct role_trans *tr, *ltr = NULL;
+ struct filename_trans *ft, *nft;
for (i = 0; i < SYM_NUM; i++) {
cond_resched();
@@ -781,6 +787,15 @@ void policydb_destroy(struct policydb *p)
}
flex_array_free(p->type_attr_map_array);
}
+
+ ft = p->filename_trans;
+ while (ft) {
+ nft = ft->next;
+ kfree(ft->name);
+ kfree(ft);
+ ft = nft;
+ }
+
ebitmap_destroy(&p->policycaps);
ebitmap_destroy(&p->permissive_map);
@@ -1788,6 +1803,76 @@ out:
return rc;
}
+static int filename_trans_read(struct policydb *p, void *fp)
+{
+ struct filename_trans *ft, *last;
+ u32 nel, len;
+ char *name;
+ __le32 buf[4];
+ int rc, i;
+
+ if (p->policyvers < POLICYDB_VERSION_FILENAME_TRANS)
+ return 0;
+
+ rc = next_entry(buf, fp, sizeof(u32));
+ if (rc)
+ goto out;
+ nel = le32_to_cpu(buf[0]);
+
+ printk(KERN_ERR "%s: nel=%d\n", __func__, nel);
+
+ last = p->filename_trans;
+ while (last && last->next)
+ last = last->next;
+
+ for (i = 0; i < nel; i++) {
+ rc = -ENOMEM;
+ ft = kzalloc(sizeof(*ft), GFP_KERNEL);
+ if (!ft)
+ goto out;
+
+ /* add it to the tail of the list */
+ if (!last)
+ p->filename_trans = ft;
+ else
+ last->next = ft;
+ last = ft;
+
+ /* length of the path component string */
+ rc = next_entry(buf, fp, sizeof(u32));
+ if (rc)
+ goto out;
+ len = le32_to_cpu(buf[0]);
+
+ rc = -ENOMEM;
+ name = kmalloc(len + 1, GFP_KERNEL);
+ if (!name)
+ goto out;
+
+ ft->name = name;
+
+ /* path component string */
+ rc = next_entry(name, fp, len);
+ if (rc)
+ goto out;
+ name[len] = 0;
+
+ printk(KERN_ERR "%s: ft=%p ft->name=%p ft->name=%s\n", __func__, ft, ft->name, ft->name);
+
+ rc = next_entry(buf, fp, sizeof(u32) * 4);
+ if (rc)
+ goto out;
+
+ ft->stype = le32_to_cpu(buf[0]);
+ ft->ttype = le32_to_cpu(buf[1]);
+ ft->tclass = le32_to_cpu(buf[2]);
+ ft->otype = le32_to_cpu(buf[3]);
+ }
+ rc = 0;
+out:
+ return rc;
+}
+
static int genfs_read(struct policydb *p, void *fp)
{
int i, j, rc;
@@ -2251,6 +2336,10 @@ int policydb_read(struct policydb *p, void *fp)
lra = ra;
}
+ rc = filename_trans_read(p, fp);
+ if (rc)
+ goto bad;
+
rc = policydb_index(p);
if (rc)
goto bad;
@@ -3025,6 +3114,43 @@ static int range_write(struct policydb *p, void *fp)
return 0;
}
+static int filename_trans_write(struct policydb *p, void *fp)
+{
+ struct filename_trans *ft;
+ u32 len, nel = 0;
+ __le32 buf[4];
+ int rc;
+
+ for (ft = p->filename_trans; ft; ft = ft->next)
+ nel++;
+
+ buf[0] = cpu_to_le32(nel);
+ rc = put_entry(buf, sizeof(u32), 1, fp);
+ if (rc)
+ return rc;
+
+ for (ft = p->filename_trans; ft; ft = ft->next) {
+ len = strlen(ft->name);
+ buf[0] = cpu_to_le32(len);
+ rc = put_entry(buf, sizeof(u32), 1, fp);
+ if (rc)
+ return rc;
+
+ rc = put_entry(ft->name, sizeof(char), len, fp);
+ if (rc)
+ return rc;
+
+ buf[0] = ft->stype;
+ buf[1] = ft->ttype;
+ buf[2] = ft->tclass;
+ buf[3] = ft->otype;
+
+ rc = put_entry(buf, sizeof(u32), 4, fp);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
/*
* Write the configuration data in a policy database
* structure to a policy database binary representation
@@ -3135,6 +3261,10 @@ int policydb_write(struct policydb *p, void *fp)
if (rc)
return rc;
+ rc = filename_trans_write(p, fp);
+ if (rc)
+ return rc;
+
rc = ocontext_write(p, info, fp);
if (rc)
return rc;
diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h
index 4e3ab9d0b315..732ea4a68682 100644
--- a/security/selinux/ss/policydb.h
+++ b/security/selinux/ss/policydb.h
@@ -77,6 +77,15 @@ struct role_trans {
struct role_trans *next;
};
+struct filename_trans {
+ struct filename_trans *next;
+ u32 stype; /* current process */
+ u32 ttype; /* parent dir context */
+ u16 tclass; /* class of new object */
+ const char *name; /* last path component */
+ u32 otype; /* expected of new object */
+};
+
struct role_allow {
u32 role; /* current role */
u32 new_role; /* new role */
@@ -217,6 +226,9 @@ struct policydb {
/* role transitions */
struct role_trans *role_tr;
+ /* file transitions with the last path component */
+ struct filename_trans *filename_trans;
+
/* bools indexed by (value - 1) */
struct cond_bool_datum **bool_val_to_struct;
/* type enforcement conditional access vectors and transitions */
@@ -302,7 +314,7 @@ static inline int next_entry(void *buf, struct policy_file *fp, size_t bytes)
return 0;
}
-static inline int put_entry(void *buf, size_t bytes, int num, struct policy_file *fp)
+static inline int put_entry(const void *buf, size_t bytes, int num, struct policy_file *fp)
{
size_t len = bytes * num;
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index a03cfaf0ee07..3e7544d2a07b 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -201,6 +201,21 @@ static u16 unmap_class(u16 tclass)
return tclass;
}
+/*
+ * Get kernel value for class from its policy value
+ */
+static u16 map_class(u16 pol_value)
+{
+ u16 i;
+
+ for (i = 1; i < current_mapping_size; i++) {
+ if (current_mapping[i].value == pol_value)
+ return i;
+ }
+
+ return pol_value;
+}
+
static void map_decision(u16 tclass, struct av_decision *avd,
int allow_unknown)
{
@@ -1343,10 +1358,27 @@ out:
return -EACCES;
}
+static void filename_compute_type(struct policydb *p, struct context *newcontext,
+ u32 scon, u32 tcon, u16 tclass,
+ const struct qstr *qstr)
+{
+ struct filename_trans *ft;
+ for (ft = p->filename_trans; ft; ft = ft->next) {
+ if (ft->stype == scon &&
+ ft->ttype == tcon &&
+ ft->tclass == tclass &&
+ !strcmp(ft->name, qstr->name)) {
+ newcontext->type = ft->otype;
+ return;
+ }
+ }
+}
+
static int security_compute_sid(u32 ssid,
u32 tsid,
u16 orig_tclass,
u32 specified,
+ const struct qstr *qstr,
u32 *out_sid,
bool kern)
{
@@ -1357,6 +1389,7 @@ static int security_compute_sid(u32 ssid,
struct avtab_node *node;
u16 tclass;
int rc = 0;
+ bool sock;
if (!ss_initialized) {
switch (orig_tclass) {
@@ -1374,10 +1407,13 @@ static int security_compute_sid(u32 ssid,
read_lock(&policy_rwlock);
- if (kern)
+ if (kern) {
tclass = unmap_class(orig_tclass);
- else
+ sock = security_is_socket_class(orig_tclass);
+ } else {
tclass = orig_tclass;
+ sock = security_is_socket_class(map_class(tclass));
+ }
scontext = sidtab_search(&sidtab, ssid);
if (!scontext) {
@@ -1408,7 +1444,7 @@ static int security_compute_sid(u32 ssid,
}
/* Set the role and type to default values. */
- if (tclass == policydb.process_class) {
+ if ((tclass == policydb.process_class) || (sock == true)) {
/* Use the current role and type of process. */
newcontext.role = scontext->role;
newcontext.type = scontext->type;
@@ -1442,6 +1478,11 @@ static int security_compute_sid(u32 ssid,
newcontext.type = avdatum->data;
}
+ /* if we have a qstr this is a file trans check so check those rules */
+ if (qstr)
+ filename_compute_type(&policydb, &newcontext, scontext->type,
+ tcontext->type, tclass, qstr);
+
/* Check for class-specific changes. */
if (tclass == policydb.process_class) {
if (specified & AVTAB_TRANSITION) {
@@ -1460,7 +1501,8 @@ static int security_compute_sid(u32 ssid,
/* Set the MLS attributes.
This is done last because it may allocate memory. */
- rc = mls_compute_sid(scontext, tcontext, tclass, specified, &newcontext);
+ rc = mls_compute_sid(scontext, tcontext, tclass, specified,
+ &newcontext, sock);
if (rc)
goto out_unlock;
@@ -1495,22 +1537,17 @@ out:
* if insufficient memory is available, or %0 if the new SID was
* computed successfully.
*/
-int security_transition_sid(u32 ssid,
- u32 tsid,
- u16 tclass,
- u32 *out_sid)
+int security_transition_sid(u32 ssid, u32 tsid, u16 tclass,
+ const struct qstr *qstr, u32 *out_sid)
{
return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION,
- out_sid, true);
+ qstr, out_sid, true);
}
-int security_transition_sid_user(u32 ssid,
- u32 tsid,
- u16 tclass,
- u32 *out_sid)
+int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, u32 *out_sid)
{
return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION,
- out_sid, false);
+ NULL, out_sid, false);
}
/**
@@ -1531,8 +1568,8 @@ int security_member_sid(u32 ssid,
u16 tclass,
u32 *out_sid)
{
- return security_compute_sid(ssid, tsid, tclass, AVTAB_MEMBER, out_sid,
- false);
+ return security_compute_sid(ssid, tsid, tclass, AVTAB_MEMBER, NULL,
+ out_sid, false);
}
/**
@@ -1553,8 +1590,8 @@ int security_change_sid(u32 ssid,
u16 tclass,
u32 *out_sid)
{
- return security_compute_sid(ssid, tsid, tclass, AVTAB_CHANGE, out_sid,
- false);
+ return security_compute_sid(ssid, tsid, tclass, AVTAB_CHANGE, NULL,
+ out_sid, false);
}
/* Clone the SID into the new SID table. */
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index fff78d3b51a2..728c57e3d65d 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -208,7 +208,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp,
if (!uctx)
goto not_from_user;
- if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX)
+ if (uctx->ctx_alg != XFRM_SC_ALG_SELINUX)
return -EINVAL;
str_len = uctx->ctx_len;
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 129c4eb8ffb1..b449cfdad21c 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -52,13 +52,16 @@ struct socket_smack {
struct inode_smack {
char *smk_inode; /* label of the fso */
char *smk_task; /* label of the task */
+ char *smk_mmap; /* label of the mmap domain */
struct mutex smk_lock; /* initialization lock */
int smk_flags; /* smack inode flags */
};
struct task_smack {
- char *smk_task; /* label used for access control */
- char *smk_forked; /* label when forked */
+ char *smk_task; /* label for access control */
+ char *smk_forked; /* label when forked */
+ struct list_head smk_rules; /* per task access rules */
+ struct mutex smk_rules_lock; /* lock for the rules */
};
#define SMK_INODE_INSTANT 0x01 /* inode is instantiated */
@@ -152,12 +155,6 @@ struct smack_known {
#define SMACK_MAGIC 0x43415d53 /* "SMAC" */
/*
- * A limit on the number of entries in the lists
- * makes some of the list administration easier.
- */
-#define SMACK_LIST_MAX 10000
-
-/*
* CIPSO defaults.
*/
#define SMACK_CIPSO_DOI_DEFAULT 3 /* Historical */
@@ -174,9 +171,7 @@ struct smack_known {
/*
* Just to make the common cases easier to deal with
*/
-#define MAY_ANY (MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC)
#define MAY_ANYREAD (MAY_READ | MAY_EXEC)
-#define MAY_ANYWRITE (MAY_WRITE | MAY_APPEND)
#define MAY_READWRITE (MAY_READ | MAY_WRITE)
#define MAY_NOT 0
@@ -202,7 +197,7 @@ struct inode_smack *new_inode_smack(char *);
/*
* These functions are in smack_access.c
*/
-int smk_access_entry(char *, char *);
+int smk_access_entry(char *, char *, struct list_head *);
int smk_access(char *, char *, int, struct smk_audit_info *);
int smk_curacc(char *, u32, struct smk_audit_info *);
int smack_to_cipso(const char *, struct smack_cipso *);
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 7ba8478f599e..86453db4333d 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -70,10 +70,11 @@ int log_policy = SMACK_AUDIT_DENIED;
* smk_access_entry - look up matching access rule
* @subject_label: a pointer to the subject's Smack label
* @object_label: a pointer to the object's Smack label
+ * @rule_list: the list of rules to search
*
* This function looks up the subject/object pair in the
- * access rule list and returns pointer to the matching rule if found,
- * NULL otherwise.
+ * access rule list and returns the access mode. If no
+ * entry is found returns -ENOENT.
*
* NOTE:
* Even though Smack labels are usually shared on smack_list
@@ -85,13 +86,13 @@ int log_policy = SMACK_AUDIT_DENIED;
* will be on the list, so checking the pointers may be a worthwhile
* optimization.
*/
-int smk_access_entry(char *subject_label, char *object_label)
+int smk_access_entry(char *subject_label, char *object_label,
+ struct list_head *rule_list)
{
- u32 may = MAY_NOT;
+ int may = -ENOENT;
struct smack_rule *srp;
- rcu_read_lock();
- list_for_each_entry_rcu(srp, &smack_rule_list, list) {
+ list_for_each_entry_rcu(srp, rule_list, list) {
if (srp->smk_subject == subject_label ||
strcmp(srp->smk_subject, subject_label) == 0) {
if (srp->smk_object == object_label ||
@@ -101,7 +102,6 @@ int smk_access_entry(char *subject_label, char *object_label)
}
}
}
- rcu_read_unlock();
return may;
}
@@ -129,7 +129,7 @@ int smk_access_entry(char *subject_label, char *object_label)
int smk_access(char *subject_label, char *object_label, int request,
struct smk_audit_info *a)
{
- u32 may = MAY_NOT;
+ int may = MAY_NOT;
int rc = 0;
/*
@@ -181,13 +181,14 @@ int smk_access(char *subject_label, char *object_label, int request,
* Beyond here an explicit relationship is required.
* If the requested access is contained in the available
* access (e.g. read is included in readwrite) it's
- * good.
- */
- may = smk_access_entry(subject_label, object_label);
- /*
- * This is a bit map operation.
+ * good. A negative response from smk_access_entry()
+ * indicates there is no entry for this pair.
*/
- if ((request & may) == request)
+ rcu_read_lock();
+ may = smk_access_entry(subject_label, object_label, &smack_rule_list);
+ rcu_read_unlock();
+
+ if (may > 0 && (request & may) == request)
goto out_audit;
rc = -EACCES;
@@ -212,12 +213,27 @@ out_audit:
*/
int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a)
{
+ struct task_smack *tsp = current_security();
+ char *sp = smk_of_task(tsp);
+ int may;
int rc;
- char *sp = smk_of_current();
+ /*
+ * Check the global rule list
+ */
rc = smk_access(sp, obj_label, mode, NULL);
- if (rc == 0)
- goto out_audit;
+ if (rc == 0) {
+ /*
+ * If there is an entry in the task's rule list
+ * it can further restrict access.
+ */
+ may = smk_access_entry(sp, obj_label, &tsp->smk_rules);
+ if (may < 0)
+ goto out_audit;
+ if ((mode & may) == mode)
+ goto out_audit;
+ rc = -EACCES;
+ }
/*
* Return if a specific label has been designated as the
@@ -228,7 +244,7 @@ int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a)
goto out_audit;
if (capable(CAP_MAC_OVERRIDE))
- return 0;
+ rc = 0;
out_audit:
#ifdef CONFIG_AUDIT
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 533bf3255d7f..23c7a6d0c80c 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -33,6 +33,7 @@
#include <net/cipso_ipv4.h>
#include <linux/audit.h>
#include <linux/magic.h>
+#include <linux/dcache.h>
#include "smack.h"
#define task_security(task) (task_cred_xxx((task), security))
@@ -84,6 +85,56 @@ struct inode_smack *new_inode_smack(char *smack)
return isp;
}
+/**
+ * new_task_smack - allocate a task security blob
+ * @smack: a pointer to the Smack label to use in the blob
+ *
+ * Returns the new blob or NULL if there's no memory available
+ */
+static struct task_smack *new_task_smack(char *task, char *forked, gfp_t gfp)
+{
+ struct task_smack *tsp;
+
+ tsp = kzalloc(sizeof(struct task_smack), gfp);
+ if (tsp == NULL)
+ return NULL;
+
+ tsp->smk_task = task;
+ tsp->smk_forked = forked;
+ INIT_LIST_HEAD(&tsp->smk_rules);
+ mutex_init(&tsp->smk_rules_lock);
+
+ return tsp;
+}
+
+/**
+ * smk_copy_rules - copy a rule set
+ * @nhead - new rules header pointer
+ * @ohead - old rules header pointer
+ *
+ * Returns 0 on success, -ENOMEM on error
+ */
+static int smk_copy_rules(struct list_head *nhead, struct list_head *ohead,
+ gfp_t gfp)
+{
+ struct smack_rule *nrp;
+ struct smack_rule *orp;
+ int rc = 0;
+
+ INIT_LIST_HEAD(nhead);
+
+ list_for_each_entry_rcu(orp, ohead, list) {
+ nrp = kzalloc(sizeof(struct smack_rule), gfp);
+ if (nrp == NULL) {
+ rc = -ENOMEM;
+ break;
+ }
+ *nrp = *orp;
+ list_add_rcu(&nrp->list, nhead);
+ }
+ return rc;
+}
+
/*
* LSM hooks.
* We he, that is fun!
@@ -102,23 +153,17 @@ static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode)
{
int rc;
struct smk_audit_info ad;
- char *sp, *tsp;
+ char *tsp;
rc = cap_ptrace_access_check(ctp, mode);
if (rc != 0)
return rc;
- sp = smk_of_current();
tsp = smk_of_task(task_security(ctp));
smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
smk_ad_setfield_u_tsk(&ad, ctp);
- /* we won't log here, because rc can be overriden */
- rc = smk_access(sp, tsp, MAY_READWRITE, NULL);
- if (rc != 0 && capable(CAP_MAC_OVERRIDE))
- rc = 0;
-
- smack_log(sp, tsp, MAY_READWRITE, rc, &ad);
+ rc = smk_curacc(tsp, MAY_READWRITE, &ad);
return rc;
}
@@ -134,23 +179,17 @@ static int smack_ptrace_traceme(struct task_struct *ptp)
{
int rc;
struct smk_audit_info ad;
- char *sp, *tsp;
+ char *tsp;
rc = cap_ptrace_traceme(ptp);
if (rc != 0)
return rc;
+ tsp = smk_of_task(task_security(ptp));
smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
smk_ad_setfield_u_tsk(&ad, ptp);
- sp = smk_of_current();
- tsp = smk_of_task(task_security(ptp));
- /* we won't log here, because rc can be overriden */
- rc = smk_access(tsp, sp, MAY_READWRITE, NULL);
- if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE))
- rc = 0;
-
- smack_log(tsp, sp, MAY_READWRITE, rc, &ad);
+ rc = smk_curacc(tsp, MAY_READWRITE, &ad);
return rc;
}
@@ -463,6 +502,7 @@ static void smack_inode_free_security(struct inode *inode)
* smack_inode_init_security - copy out the smack from an inode
* @inode: the inode
* @dir: unused
+ * @qstr: unused
* @name: where to put the attribute name
* @value: where to put the attribute value
* @len: where to put the length of the attribute
@@ -470,11 +510,12 @@ static void smack_inode_free_security(struct inode *inode)
* Returns 0 if it all works out, -ENOMEM if there's no memory
*/
static int smack_inode_init_security(struct inode *inode, struct inode *dir,
- char **name, void **value, size_t *len)
+ const struct qstr *qstr, char **name,
+ void **value, size_t *len)
{
char *isp = smk_of_inode(inode);
char *dsp = smk_of_inode(dir);
- u32 may;
+ int may;
if (name) {
*name = kstrdup(XATTR_SMACK_SUFFIX, GFP_KERNEL);
@@ -483,14 +524,17 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir,
}
if (value) {
- may = smk_access_entry(smk_of_current(), dsp);
+ rcu_read_lock();
+ may = smk_access_entry(smk_of_current(), dsp, &smack_rule_list);
+ rcu_read_unlock();
/*
* If the access rule allows transmutation and
* the directory requests transmutation then
* by all means transmute.
*/
- if (((may & MAY_TRANSMUTE) != 0) && smk_inode_transmutable(dir))
+ if (may > 0 && ((may & MAY_TRANSMUTE) != 0) &&
+ smk_inode_transmutable(dir))
isp = dsp;
*value = kstrdup(isp, GFP_KERNEL);
@@ -716,7 +760,8 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name,
if (strcmp(name, XATTR_NAME_SMACK) == 0 ||
strcmp(name, XATTR_NAME_SMACKIPIN) == 0 ||
strcmp(name, XATTR_NAME_SMACKIPOUT) == 0 ||
- strcmp(name, XATTR_NAME_SMACKEXEC) == 0) {
+ strcmp(name, XATTR_NAME_SMACKEXEC) == 0 ||
+ strcmp(name, XATTR_NAME_SMACKMMAP) == 0) {
if (!capable(CAP_MAC_ADMIN))
rc = -EPERM;
/*
@@ -773,6 +818,12 @@ static void smack_inode_post_setxattr(struct dentry *dentry, const char *name,
isp->smk_task = nsp;
else
isp->smk_task = smack_known_invalid.smk_known;
+ } else if (strcmp(name, XATTR_NAME_SMACKMMAP) == 0) {
+ nsp = smk_import(value, size);
+ if (nsp != NULL)
+ isp->smk_mmap = nsp;
+ else
+ isp->smk_mmap = smack_known_invalid.smk_known;
} else if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0)
isp->smk_flags |= SMK_INODE_TRANSMUTE;
@@ -815,7 +866,8 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
strcmp(name, XATTR_NAME_SMACKIPIN) == 0 ||
strcmp(name, XATTR_NAME_SMACKIPOUT) == 0 ||
strcmp(name, XATTR_NAME_SMACKEXEC) == 0 ||
- strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) {
+ strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0 ||
+ strcmp(name, XATTR_NAME_SMACKMMAP)) {
if (!capable(CAP_MAC_ADMIN))
rc = -EPERM;
} else
@@ -829,6 +881,7 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
if (rc == 0) {
isp = dentry->d_inode->i_security;
isp->smk_task = NULL;
+ isp->smk_mmap = NULL;
}
return rc;
@@ -1060,6 +1113,126 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
}
/**
+ * smack_file_mmap :
+ * Check permissions for a mmap operation. The @file may be NULL, e.g.
+ * if mapping anonymous memory.
+ * @file contains the file structure for file to map (may be NULL).
+ * @reqprot contains the protection requested by the application.
+ * @prot contains the protection that will be applied by the kernel.
+ * @flags contains the operational flags.
+ * Return 0 if permission is granted.
+ */
+static int smack_file_mmap(struct file *file,
+ unsigned long reqprot, unsigned long prot,
+ unsigned long flags, unsigned long addr,
+ unsigned long addr_only)
+{
+ struct smack_rule *srp;
+ struct task_smack *tsp;
+ char *sp;
+ char *msmack;
+ char *osmack;
+ struct inode_smack *isp;
+ struct dentry *dp;
+ int may;
+ int mmay;
+ int tmay;
+ int rc;
+
+ /* do DAC check on address space usage */
+ rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only);
+ if (rc || addr_only)
+ return rc;
+
+ if (file == NULL || file->f_dentry == NULL)
+ return 0;
+
+ dp = file->f_dentry;
+
+ if (dp->d_inode == NULL)
+ return 0;
+
+ isp = dp->d_inode->i_security;
+ if (isp->smk_mmap == NULL)
+ return 0;
+ msmack = isp->smk_mmap;
+
+ tsp = current_security();
+ sp = smk_of_current();
+ rc = 0;
+
+ rcu_read_lock();
+ /*
+ * For each Smack rule associated with the subject
+ * label verify that the SMACK64MMAP also has access
+ * to that rule's object label.
+ *
+ * Because neither of the labels comes
+ * from the networking code it is sufficient
+ * to compare pointers.
+ */
+ list_for_each_entry_rcu(srp, &smack_rule_list, list) {
+ if (srp->smk_subject != sp)
+ continue;
+
+ osmack = srp->smk_object;
+ /*
+ * Matching labels always allows access.
+ */
+ if (msmack == osmack)
+ continue;
+ /*
+ * If there is a matching local rule take
+ * that into account as well.
+ */
+ may = smk_access_entry(srp->smk_subject, osmack,
+ &tsp->smk_rules);
+ if (may == -ENOENT)
+ may = srp->smk_access;
+ else
+ may &= srp->smk_access;
+ /*
+ * If may is zero the SMACK64MMAP subject can't
+ * possibly have less access.
+ */
+ if (may == 0)
+ continue;
+
+ /*
+ * Fetch the global list entry.
+ * If there isn't one a SMACK64MMAP subject
+ * can't have as much access as current.
+ */
+ mmay = smk_access_entry(msmack, osmack, &smack_rule_list);
+ if (mmay == -ENOENT) {
+ rc = -EACCES;
+ break;
+ }
+ /*
+ * If there is a local entry it modifies the
+ * potential access, too.
+ */
+ tmay = smk_access_entry(msmack, osmack, &tsp->smk_rules);
+ if (tmay != -ENOENT)
+ mmay &= tmay;
+
+ /*
+ * If there is any access available to current that is
+ * not available to a SMACK64MMAP subject
+ * deny access.
+ */
+ if ((may | mmay) != mmay) {
+ rc = -EACCES;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return rc;
+}
+
+/**
* smack_file_set_fowner - set the file security blob value
* @file: object in question
*
@@ -1095,6 +1268,7 @@ static int smack_file_send_sigiotask(struct task_struct *tsk,
* struct fown_struct is never outside the context of a struct file
*/
file = container_of(fown, struct file, f_owner);
+
/* we don't log here as rc can be overriden */
rc = smk_access(file->f_security, tsp, MAY_WRITE, NULL);
if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE))
@@ -1145,9 +1319,14 @@ static int smack_file_receive(struct file *file)
*/
static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp)
{
- cred->security = kzalloc(sizeof(struct task_smack), gfp);
- if (cred->security == NULL)
+ struct task_smack *tsp;
+
+ tsp = new_task_smack(NULL, NULL, gfp);
+ if (tsp == NULL)
return -ENOMEM;
+
+ cred->security = tsp;
+
return 0;
}
@@ -1156,13 +1335,24 @@ static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp)
* smack_cred_free - "free" task-level security credentials
* @cred: the credentials in question
*
- * Smack isn't using copies of blobs. Everyone
- * points to an immutable list. The blobs never go away.
- * There is no leak here.
*/
static void smack_cred_free(struct cred *cred)
{
- kfree(cred->security);
+ struct task_smack *tsp = cred->security;
+ struct smack_rule *rp;
+ struct list_head *l;
+ struct list_head *n;
+
+ if (tsp == NULL)
+ return;
+ cred->security = NULL;
+
+ list_for_each_safe(l, n, &tsp->smk_rules) {
+ rp = list_entry(l, struct smack_rule, list);
+ list_del(&rp->list);
+ kfree(rp);
+ }
+ kfree(tsp);
}
/**
@@ -1178,13 +1368,16 @@ static int smack_cred_prepare(struct cred *new, const struct cred *old,
{
struct task_smack *old_tsp = old->security;
struct task_smack *new_tsp;
+ int rc;
- new_tsp = kzalloc(sizeof(struct task_smack), gfp);
+ new_tsp = new_task_smack(old_tsp->smk_task, old_tsp->smk_task, gfp);
if (new_tsp == NULL)
return -ENOMEM;
- new_tsp->smk_task = old_tsp->smk_task;
- new_tsp->smk_forked = old_tsp->smk_task;
+ rc = smk_copy_rules(&new_tsp->smk_rules, &old_tsp->smk_rules, gfp);
+ if (rc != 0)
+ return rc;
+
new->security = new_tsp;
return 0;
}
@@ -1203,6 +1396,11 @@ static void smack_cred_transfer(struct cred *new, const struct cred *old)
new_tsp->smk_task = old_tsp->smk_task;
new_tsp->smk_forked = old_tsp->smk_task;
+ mutex_init(&new_tsp->smk_rules_lock);
+ INIT_LIST_HEAD(&new_tsp->smk_rules);
+
+
+ /* cbs copy rule list */
}
/**
@@ -2419,6 +2617,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
}
}
isp->smk_task = smk_fetch(XATTR_NAME_SMACKEXEC, inode, dp);
+ isp->smk_mmap = smk_fetch(XATTR_NAME_SMACKMMAP, inode, dp);
dput(dp);
break;
@@ -2478,6 +2677,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value)
static int smack_setprocattr(struct task_struct *p, char *name,
void *value, size_t size)
{
+ int rc;
struct task_smack *tsp;
struct task_smack *oldtsp;
struct cred *new;
@@ -2513,13 +2713,16 @@ static int smack_setprocattr(struct task_struct *p, char *name,
new = prepare_creds();
if (new == NULL)
return -ENOMEM;
- tsp = kzalloc(sizeof(struct task_smack), GFP_KERNEL);
+
+ tsp = new_task_smack(newsmack, oldtsp->smk_forked, GFP_KERNEL);
if (tsp == NULL) {
kfree(new);
return -ENOMEM;
}
- tsp->smk_task = newsmack;
- tsp->smk_forked = oldtsp->smk_forked;
+ rc = smk_copy_rules(&tsp->smk_rules, &oldtsp->smk_rules, GFP_KERNEL);
+ if (rc != 0)
+ return rc;
+
new->security = tsp;
commit_creds(new);
return size;
@@ -3221,6 +3424,7 @@ struct security_operations smack_ops = {
.file_ioctl = smack_file_ioctl,
.file_lock = smack_file_lock,
.file_fcntl = smack_file_fcntl,
+ .file_mmap = smack_file_mmap,
.file_set_fowner = smack_file_set_fowner,
.file_send_sigiotask = smack_file_send_sigiotask,
.file_receive = smack_file_receive,
@@ -3334,23 +3538,20 @@ static __init int smack_init(void)
struct cred *cred;
struct task_smack *tsp;
- tsp = kzalloc(sizeof(struct task_smack), GFP_KERNEL);
+ if (!security_module_enable(&smack_ops))
+ return 0;
+
+ tsp = new_task_smack(smack_known_floor.smk_known,
+ smack_known_floor.smk_known, GFP_KERNEL);
if (tsp == NULL)
return -ENOMEM;
- if (!security_module_enable(&smack_ops)) {
- kfree(tsp);
- return 0;
- }
-
printk(KERN_INFO "Smack: Initializing.\n");
/*
* Set the security state for the initial task.
*/
cred = (struct cred *) current->cred;
- tsp->smk_forked = smack_known_floor.smk_known;
- tsp->smk_task = smack_known_floor.smk_known;
cred->security = tsp;
/* initialize the smack_know_list */
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 362d5eda948b..90d1bbaaa6f3 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -43,6 +43,7 @@ enum smk_inos {
SMK_NETLBLADDR = 8, /* single label hosts */
SMK_ONLYCAP = 9, /* the only "capable" label */
SMK_LOGGING = 10, /* logging */
+ SMK_LOAD_SELF = 11, /* task specific rules */
};
/*
@@ -135,104 +136,30 @@ static void smk_netlabel_audit_set(struct netlbl_audit *nap)
#define SMK_NETLBLADDRMIN 9
#define SMK_NETLBLADDRMAX 42
-/*
- * Seq_file read operations for /smack/load
- */
-
-static void *load_seq_start(struct seq_file *s, loff_t *pos)
-{
- if (*pos == SEQ_READ_FINISHED)
- return NULL;
- if (list_empty(&smack_rule_list))
- return NULL;
- return smack_rule_list.next;
-}
-
-static void *load_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- struct list_head *list = v;
-
- if (list_is_last(list, &smack_rule_list)) {
- *pos = SEQ_READ_FINISHED;
- return NULL;
- }
- return list->next;
-}
-
-static int load_seq_show(struct seq_file *s, void *v)
-{
- struct list_head *list = v;
- struct smack_rule *srp =
- list_entry(list, struct smack_rule, list);
-
- seq_printf(s, "%s %s", (char *)srp->smk_subject,
- (char *)srp->smk_object);
-
- seq_putc(s, ' ');
-
- if (srp->smk_access & MAY_READ)
- seq_putc(s, 'r');
- if (srp->smk_access & MAY_WRITE)
- seq_putc(s, 'w');
- if (srp->smk_access & MAY_EXEC)
- seq_putc(s, 'x');
- if (srp->smk_access & MAY_APPEND)
- seq_putc(s, 'a');
- if (srp->smk_access & MAY_TRANSMUTE)
- seq_putc(s, 't');
- if (srp->smk_access == 0)
- seq_putc(s, '-');
-
- seq_putc(s, '\n');
-
- return 0;
-}
-
-static void load_seq_stop(struct seq_file *s, void *v)
-{
- /* No-op */
-}
-
-static const struct seq_operations load_seq_ops = {
- .start = load_seq_start,
- .next = load_seq_next,
- .show = load_seq_show,
- .stop = load_seq_stop,
-};
-
-/**
- * smk_open_load - open() for /smack/load
- * @inode: inode structure representing file
- * @file: "load" file pointer
- *
- * For reading, use load_seq_* seq_file reading operations.
- */
-static int smk_open_load(struct inode *inode, struct file *file)
-{
- return seq_open(file, &load_seq_ops);
-}
-
/**
* smk_set_access - add a rule to the rule list
* @srp: the new rule to add
+ * @rule_list: the list of rules
+ * @rule_lock: the rule list lock
*
* Looks through the current subject/object/access list for
* the subject/object pair and replaces the access that was
* there. If the pair isn't found add it with the specified
* access.
*
+ * Returns 1 if a rule was found to exist already, 0 if it is new
* Returns 0 if nothing goes wrong or -ENOMEM if it fails
* during the allocation of the new pair to add.
*/
-static int smk_set_access(struct smack_rule *srp)
+static int smk_set_access(struct smack_rule *srp, struct list_head *rule_list,
+ struct mutex *rule_lock)
{
struct smack_rule *sp;
- int ret = 0;
- int found;
- mutex_lock(&smack_list_lock);
+ int found = 0;
- found = 0;
- list_for_each_entry_rcu(sp, &smack_rule_list, list) {
+ mutex_lock(rule_lock);
+
+ list_for_each_entry_rcu(sp, rule_list, list) {
if (sp->smk_subject == srp->smk_subject &&
sp->smk_object == srp->smk_object) {
found = 1;
@@ -241,19 +168,21 @@ static int smk_set_access(struct smack_rule *srp)
}
}
if (found == 0)
- list_add_rcu(&srp->list, &smack_rule_list);
+ list_add_rcu(&srp->list, rule_list);
- mutex_unlock(&smack_list_lock);
+ mutex_unlock(rule_lock);
- return ret;
+ return found;
}
/**
- * smk_write_load - write() for /smack/load
+ * smk_write_load_list - write() for any /smack/load
* @file: file pointer, not actually used
* @buf: where to get the data from
* @count: bytes sent
* @ppos: where to start - must be 0
+ * @rule_list: the list of rules to write to
+ * @rule_lock: lock for the rule list
*
* Get one smack access rule from above.
* The format is exactly:
@@ -263,21 +192,19 @@ static int smk_set_access(struct smack_rule *srp)
*
* writes must be SMK_LABELLEN+SMK_LABELLEN+SMK_ACCESSLEN bytes.
*/
-static ssize_t smk_write_load(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+static ssize_t smk_write_load_list(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos,
+ struct list_head *rule_list,
+ struct mutex *rule_lock)
{
struct smack_rule *rule;
char *data;
int rc = -EINVAL;
/*
- * Must have privilege.
* No partial writes.
* Enough data must be present.
*/
- if (!capable(CAP_MAC_ADMIN))
- return -EPERM;
-
if (*ppos != 0)
return -EINVAL;
/*
@@ -372,11 +299,13 @@ static ssize_t smk_write_load(struct file *file, const char __user *buf,
goto out_free_rule;
}
- rc = smk_set_access(rule);
-
- if (!rc)
- rc = count;
- goto out;
+ rc = count;
+ /*
+ * smk_set_access returns true if there was already a rule
+ * for the subject/object pair, and false if it was new.
+ */
+ if (!smk_set_access(rule, rule_list, rule_lock))
+ goto out;
out_free_rule:
kfree(rule);
@@ -385,6 +314,108 @@ out:
return rc;
}
+
+/*
+ * Seq_file read operations for /smack/load
+ */
+
+static void *load_seq_start(struct seq_file *s, loff_t *pos)
+{
+ if (*pos == SEQ_READ_FINISHED)
+ return NULL;
+ if (list_empty(&smack_rule_list))
+ return NULL;
+ return smack_rule_list.next;
+}
+
+static void *load_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct list_head *list = v;
+
+ if (list_is_last(list, &smack_rule_list)) {
+ *pos = SEQ_READ_FINISHED;
+ return NULL;
+ }
+ return list->next;
+}
+
+static int load_seq_show(struct seq_file *s, void *v)
+{
+ struct list_head *list = v;
+ struct smack_rule *srp =
+ list_entry(list, struct smack_rule, list);
+
+ seq_printf(s, "%s %s", (char *)srp->smk_subject,
+ (char *)srp->smk_object);
+
+ seq_putc(s, ' ');
+
+ if (srp->smk_access & MAY_READ)
+ seq_putc(s, 'r');
+ if (srp->smk_access & MAY_WRITE)
+ seq_putc(s, 'w');
+ if (srp->smk_access & MAY_EXEC)
+ seq_putc(s, 'x');
+ if (srp->smk_access & MAY_APPEND)
+ seq_putc(s, 'a');
+ if (srp->smk_access & MAY_TRANSMUTE)
+ seq_putc(s, 't');
+ if (srp->smk_access == 0)
+ seq_putc(s, '-');
+
+ seq_putc(s, '\n');
+
+ return 0;
+}
+
+static void load_seq_stop(struct seq_file *s, void *v)
+{
+ /* No-op */
+}
+
+static const struct seq_operations load_seq_ops = {
+ .start = load_seq_start,
+ .next = load_seq_next,
+ .show = load_seq_show,
+ .stop = load_seq_stop,
+};
+
+/**
+ * smk_open_load - open() for /smack/load
+ * @inode: inode structure representing file
+ * @file: "load" file pointer
+ *
+ * For reading, use load_seq_* seq_file reading operations.
+ */
+static int smk_open_load(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &load_seq_ops);
+}
+
+/**
+ * smk_write_load - write() for /smack/load
+ * @file: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start - must be 0
+ *
+ */
+static ssize_t smk_write_load(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+
+ /*
+ * Must have privilege.
+ * No partial writes.
+ * Enough data must be present.
+ */
+ if (!capable(CAP_MAC_ADMIN))
+ return -EPERM;
+
+ return smk_write_load_list(file, buf, count, ppos, &smack_rule_list,
+ &smack_list_lock);
+}
+
static const struct file_operations smk_load_ops = {
.open = smk_open_load,
.read = seq_read,
@@ -1288,6 +1319,112 @@ static const struct file_operations smk_logging_ops = {
.write = smk_write_logging,
.llseek = default_llseek,
};
+
+/*
+ * Seq_file read operations for /smack/load-self
+ */
+
+static void *load_self_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct task_smack *tsp = current_security();
+
+ if (*pos == SEQ_READ_FINISHED)
+ return NULL;
+ if (list_empty(&tsp->smk_rules))
+ return NULL;
+ return tsp->smk_rules.next;
+}
+
+static void *load_self_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct task_smack *tsp = current_security();
+ struct list_head *list = v;
+
+ if (list_is_last(list, &tsp->smk_rules)) {
+ *pos = SEQ_READ_FINISHED;
+ return NULL;
+ }
+ return list->next;
+}
+
+static int load_self_seq_show(struct seq_file *s, void *v)
+{
+ struct list_head *list = v;
+ struct smack_rule *srp =
+ list_entry(list, struct smack_rule, list);
+
+ seq_printf(s, "%s %s", (char *)srp->smk_subject,
+ (char *)srp->smk_object);
+
+ seq_putc(s, ' ');
+
+ if (srp->smk_access & MAY_READ)
+ seq_putc(s, 'r');
+ if (srp->smk_access & MAY_WRITE)
+ seq_putc(s, 'w');
+ if (srp->smk_access & MAY_EXEC)
+ seq_putc(s, 'x');
+ if (srp->smk_access & MAY_APPEND)
+ seq_putc(s, 'a');
+ if (srp->smk_access & MAY_TRANSMUTE)
+ seq_putc(s, 't');
+ if (srp->smk_access == 0)
+ seq_putc(s, '-');
+
+ seq_putc(s, '\n');
+
+ return 0;
+}
+
+static void load_self_seq_stop(struct seq_file *s, void *v)
+{
+ /* No-op */
+}
+
+static const struct seq_operations load_self_seq_ops = {
+ .start = load_self_seq_start,
+ .next = load_self_seq_next,
+ .show = load_self_seq_show,
+ .stop = load_self_seq_stop,
+};
+
+
+/**
+ * smk_open_load_self - open() for /smack/load-self
+ * @inode: inode structure representing file
+ * @file: "load" file pointer
+ *
+ * For reading, use load_seq_* seq_file reading operations.
+ */
+static int smk_open_load_self(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &load_self_seq_ops);
+}
+
+/**
+ * smk_write_load_self - write() for /smack/load-self
+ * @file: file pointer, not actually used
+ * @buf: where to get the data from
+ * @count: bytes sent
+ * @ppos: where to start - must be 0
+ *
+ */
+static ssize_t smk_write_load_self(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_smack *tsp = current_security();
+
+ return smk_write_load_list(file, buf, count, ppos, &tsp->smk_rules,
+ &tsp->smk_rules_lock);
+}
+
+static const struct file_operations smk_load_self_ops = {
+ .open = smk_open_load_self,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .write = smk_write_load_self,
+ .release = seq_release,
+};
/**
* smk_fill_super - fill the /smackfs superblock
* @sb: the empty superblock
@@ -1304,23 +1441,26 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
struct inode *root_inode;
static struct tree_descr smack_files[] = {
- [SMK_LOAD] =
- {"load", &smk_load_ops, S_IRUGO|S_IWUSR},
- [SMK_CIPSO] =
- {"cipso", &smk_cipso_ops, S_IRUGO|S_IWUSR},
- [SMK_DOI] =
- {"doi", &smk_doi_ops, S_IRUGO|S_IWUSR},
- [SMK_DIRECT] =
- {"direct", &smk_direct_ops, S_IRUGO|S_IWUSR},
- [SMK_AMBIENT] =
- {"ambient", &smk_ambient_ops, S_IRUGO|S_IWUSR},
- [SMK_NETLBLADDR] =
- {"netlabel", &smk_netlbladdr_ops, S_IRUGO|S_IWUSR},
- [SMK_ONLYCAP] =
- {"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR},
- [SMK_LOGGING] =
- {"logging", &smk_logging_ops, S_IRUGO|S_IWUSR},
- /* last one */ {""}
+ [SMK_LOAD] = {
+ "load", &smk_load_ops, S_IRUGO|S_IWUSR},
+ [SMK_CIPSO] = {
+ "cipso", &smk_cipso_ops, S_IRUGO|S_IWUSR},
+ [SMK_DOI] = {
+ "doi", &smk_doi_ops, S_IRUGO|S_IWUSR},
+ [SMK_DIRECT] = {
+ "direct", &smk_direct_ops, S_IRUGO|S_IWUSR},
+ [SMK_AMBIENT] = {
+ "ambient", &smk_ambient_ops, S_IRUGO|S_IWUSR},
+ [SMK_NETLBLADDR] = {
+ "netlabel", &smk_netlbladdr_ops, S_IRUGO|S_IWUSR},
+ [SMK_ONLYCAP] = {
+ "onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR},
+ [SMK_LOGGING] = {
+ "logging", &smk_logging_ops, S_IRUGO|S_IWUSR},
+ [SMK_LOAD_SELF] = {
+ "load-self", &smk_load_self_ops, S_IRUGO|S_IWUGO},
+ /* last one */
+ {""}
};
rc = simple_fill_super(sb, SMACK_MAGIC, smack_files);
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index 9d32f182301e..cb09f1fce910 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -927,7 +927,7 @@ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
struct path *path, const int flag)
{
const u8 acc_mode = ACC_MODE(flag);
- int error = -ENOMEM;
+ int error = 0;
struct tomoyo_path_info buf;
struct tomoyo_request_info r;
int idx;
@@ -938,9 +938,6 @@ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain,
buf.name = NULL;
r.mode = TOMOYO_CONFIG_DISABLED;
idx = tomoyo_read_lock();
- if (!tomoyo_get_realpath(&buf, path))
- goto out;
- error = 0;
/*
* If the filename is specified by "deny_rewrite" keyword,
* we need to check "allow_rewrite" permission when the filename is not
diff --git a/sound/core/jack.c b/sound/core/jack.c
index 4902ae568730..53b53e97c896 100644
--- a/sound/core/jack.c
+++ b/sound/core/jack.c
@@ -141,6 +141,7 @@ int snd_jack_new(struct snd_card *card, const char *id, int type,
fail_input:
input_free_device(jack->input_dev);
+ kfree(jack->id);
kfree(jack);
return err;
}
diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c
index 23f49f356e0f..16c0bdfbb164 100644
--- a/sound/pci/au88x0/au88x0_core.c
+++ b/sound/pci/au88x0/au88x0_core.c
@@ -1252,11 +1252,19 @@ static void vortex_adbdma_resetup(vortex_t *vortex, int adbdma) {
static int inline vortex_adbdma_getlinearpos(vortex_t * vortex, int adbdma)
{
stream_t *dma = &vortex->dma_adb[adbdma];
- int temp;
+ int temp, page, delta;
temp = hwread(vortex->mmio, VORTEX_ADBDMA_STAT + (adbdma << 2));
- temp = (dma->period_virt * dma->period_bytes) + (temp & (dma->period_bytes - 1));
- return temp;
+ page = (temp & ADB_SUBBUF_MASK) >> ADB_SUBBUF_SHIFT;
+ if (dma->nr_periods >= 4)
+ delta = (page - dma->period_real) & 3;
+ else {
+ delta = (page - dma->period_real);
+ if (delta < 0)
+ delta += dma->nr_periods;
+ }
+ return (dma->period_virt + delta) * dma->period_bytes
+ + (temp & (dma->period_bytes - 1));
}
static void vortex_adbdma_startfifo(vortex_t * vortex, int adbdma)
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 0baffcdee8f9..fcedad9a5fef 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2308,6 +2308,7 @@ static struct snd_pci_quirk position_fix_list[] __devinitdata = {
SND_PCI_QUIRK(0x1043, 0x813d, "ASUS P5AD2", POS_FIX_LPIB),
SND_PCI_QUIRK(0x1043, 0x81b3, "ASUS", POS_FIX_LPIB),
SND_PCI_QUIRK(0x1043, 0x81e7, "ASUS M2V", POS_FIX_LPIB),
+ SND_PCI_QUIRK(0x1043, 0x8410, "ASUS", POS_FIX_LPIB),
SND_PCI_QUIRK(0x104d, 0x9069, "Sony VPCS11V9E", POS_FIX_LPIB),
SND_PCI_QUIRK(0x1106, 0x3288, "ASUS M2V-MX SE", POS_FIX_LPIB),
SND_PCI_QUIRK(0x1179, 0xff10, "Toshiba A100-259", POS_FIX_LPIB),
diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index a07b031090d8..067982f4f182 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -1039,9 +1039,11 @@ static struct hda_verb cs_errata_init_verbs[] = {
{0x11, AC_VERB_SET_PROC_COEF, 0x0008},
{0x11, AC_VERB_SET_PROC_STATE, 0x00},
+#if 0 /* Don't to set to D3 as we are in power-up sequence */
{0x07, AC_VERB_SET_POWER_STATE, 0x03}, /* S/PDIF Rx: D3 */
{0x08, AC_VERB_SET_POWER_STATE, 0x03}, /* S/PDIF Tx: D3 */
/*{0x01, AC_VERB_SET_POWER_STATE, 0x03},*/ /* AFG: D3 This is already handled */
+#endif
{} /* terminator */
};
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index fbe97d32140d..4d5004e693f0 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -3114,6 +3114,8 @@ static struct snd_pci_quirk cxt5066_cfg_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0401, "Dell Vostro 1014", CXT5066_DELL_VOSTRO),
SND_PCI_QUIRK(0x1028, 0x0402, "Dell Vostro", CXT5066_DELL_VOSTRO),
SND_PCI_QUIRK(0x1028, 0x0408, "Dell Inspiron One 19T", CXT5066_IDEAPAD),
+ SND_PCI_QUIRK(0x1028, 0x050f, "Dell Inspiron", CXT5066_IDEAPAD),
+ SND_PCI_QUIRK(0x1028, 0x0510, "Dell Vostro", CXT5066_IDEAPAD),
SND_PCI_QUIRK(0x103c, 0x360b, "HP G60", CXT5066_HP_LAPTOP),
SND_PCI_QUIRK(0x1043, 0x13f3, "Asus A52J", CXT5066_ASUS),
SND_PCI_QUIRK(0x1043, 0x1643, "Asus K52JU", CXT5066_ASUS),
@@ -3410,7 +3412,7 @@ static void cx_auto_parse_output(struct hda_codec *codec)
}
}
spec->multiout.dac_nids = spec->private_dac_nids;
- spec->multiout.max_channels = nums * 2;
+ spec->multiout.max_channels = spec->multiout.num_dacs * 2;
if (cfg->hp_outs > 0)
spec->auto_mute = 1;
@@ -3729,9 +3731,9 @@ static int cx_auto_init(struct hda_codec *codec)
return 0;
}
-static int cx_auto_add_volume(struct hda_codec *codec, const char *basename,
+static int cx_auto_add_volume_idx(struct hda_codec *codec, const char *basename,
const char *dir, int cidx,
- hda_nid_t nid, int hda_dir)
+ hda_nid_t nid, int hda_dir, int amp_idx)
{
static char name[32];
static struct snd_kcontrol_new knew[] = {
@@ -3743,7 +3745,8 @@ static int cx_auto_add_volume(struct hda_codec *codec, const char *basename,
for (i = 0; i < 2; i++) {
struct snd_kcontrol *kctl;
- knew[i].private_value = HDA_COMPOSE_AMP_VAL(nid, 3, 0, hda_dir);
+ knew[i].private_value = HDA_COMPOSE_AMP_VAL(nid, 3, amp_idx,
+ hda_dir);
knew[i].subdevice = HDA_SUBDEV_AMP_FLAG;
knew[i].index = cidx;
snprintf(name, sizeof(name), "%s%s %s", basename, dir, sfx[i]);
@@ -3759,6 +3762,9 @@ static int cx_auto_add_volume(struct hda_codec *codec, const char *basename,
return 0;
}
+#define cx_auto_add_volume(codec, str, dir, cidx, nid, hda_dir) \
+ cx_auto_add_volume_idx(codec, str, dir, cidx, nid, hda_dir, 0)
+
#define cx_auto_add_pb_volume(codec, nid, str, idx) \
cx_auto_add_volume(codec, str, " Playback", idx, nid, HDA_OUTPUT)
@@ -3808,29 +3814,60 @@ static int cx_auto_build_input_controls(struct hda_codec *codec)
struct conexant_spec *spec = codec->spec;
struct auto_pin_cfg *cfg = &spec->autocfg;
static const char *prev_label;
- int i, err, cidx;
+ int i, err, cidx, conn_len;
+ hda_nid_t conn[HDA_MAX_CONNECTIONS];
+
+ int multi_adc_volume = 0; /* If the ADC nid has several input volumes */
+ int adc_nid = spec->adc_nids[0];
+
+ conn_len = snd_hda_get_connections(codec, adc_nid, conn,
+ HDA_MAX_CONNECTIONS);
+ if (conn_len < 0)
+ return conn_len;
+
+ multi_adc_volume = cfg->num_inputs > 1 && conn_len > 1;
+ if (!multi_adc_volume) {
+ err = cx_auto_add_volume(codec, "Capture", "", 0, adc_nid,
+ HDA_INPUT);
+ if (err < 0)
+ return err;
+ }
- err = cx_auto_add_volume(codec, "Capture", "", 0, spec->adc_nids[0],
- HDA_INPUT);
- if (err < 0)
- return err;
prev_label = NULL;
cidx = 0;
for (i = 0; i < cfg->num_inputs; i++) {
hda_nid_t nid = cfg->inputs[i].pin;
const char *label;
- if (!(get_wcaps(codec, nid) & AC_WCAP_IN_AMP))
+ int j;
+ int pin_amp = get_wcaps(codec, nid) & AC_WCAP_IN_AMP;
+ if (!pin_amp && !multi_adc_volume)
continue;
+
label = hda_get_autocfg_input_label(codec, cfg, i);
if (label == prev_label)
cidx++;
else
cidx = 0;
prev_label = label;
- err = cx_auto_add_volume(codec, label, " Capture", cidx,
- nid, HDA_INPUT);
- if (err < 0)
- return err;
+
+ if (pin_amp) {
+ err = cx_auto_add_volume(codec, label, " Boost", cidx,
+ nid, HDA_INPUT);
+ if (err < 0)
+ return err;
+ }
+
+ if (!multi_adc_volume)
+ continue;
+ for (j = 0; j < conn_len; j++) {
+ if (conn[j] == nid) {
+ err = cx_auto_add_volume_idx(codec, label,
+ " Capture", cidx, adc_nid, HDA_INPUT, j);
+ if (err < 0)
+ return err;
+ break;
+ }
+ }
}
return 0;
}
@@ -3902,6 +3939,8 @@ static struct hda_codec_preset snd_hda_preset_conexant[] = {
.patch = patch_cxt5066 },
{ .id = 0x14f15069, .name = "CX20585",
.patch = patch_cxt5066 },
+ { .id = 0x14f1506e, .name = "CX20590",
+ .patch = patch_cxt5066 },
{ .id = 0x14f15097, .name = "CX20631",
.patch = patch_conexant_auto },
{ .id = 0x14f15098, .name = "CX20632",
@@ -3928,6 +3967,7 @@ MODULE_ALIAS("snd-hda-codec-id:14f15066");
MODULE_ALIAS("snd-hda-codec-id:14f15067");
MODULE_ALIAS("snd-hda-codec-id:14f15068");
MODULE_ALIAS("snd-hda-codec-id:14f15069");
+MODULE_ALIAS("snd-hda-codec-id:14f1506e");
MODULE_ALIAS("snd-hda-codec-id:14f15097");
MODULE_ALIAS("snd-hda-codec-id:14f15098");
MODULE_ALIAS("snd-hda-codec-id:14f150a1");
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index a58767736727..ec0fa2dd0a27 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -1634,6 +1634,9 @@ static struct hda_codec_preset snd_hda_preset_hdmi[] = {
{ .id = 0x10de0012, .name = "GPU 12 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
{ .id = 0x10de0013, .name = "GPU 13 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
{ .id = 0x10de0014, .name = "GPU 14 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
+{ .id = 0x10de0015, .name = "GPU 15 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
+{ .id = 0x10de0016, .name = "GPU 16 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
+/* 17 is known to be absent */
{ .id = 0x10de0018, .name = "GPU 18 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
{ .id = 0x10de0019, .name = "GPU 19 HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
{ .id = 0x10de001a, .name = "GPU 1a HDMI/DP", .patch = patch_nvhdmi_8ch_89 },
@@ -1676,6 +1679,8 @@ MODULE_ALIAS("snd-hda-codec-id:10de0011");
MODULE_ALIAS("snd-hda-codec-id:10de0012");
MODULE_ALIAS("snd-hda-codec-id:10de0013");
MODULE_ALIAS("snd-hda-codec-id:10de0014");
+MODULE_ALIAS("snd-hda-codec-id:10de0015");
+MODULE_ALIAS("snd-hda-codec-id:10de0016");
MODULE_ALIAS("snd-hda-codec-id:10de0018");
MODULE_ALIAS("snd-hda-codec-id:10de0019");
MODULE_ALIAS("snd-hda-codec-id:10de001a");
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 3328a259a242..4261bb8eec1d 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1133,11 +1133,8 @@ static void alc_automute_speaker(struct hda_codec *codec, int pinctl)
nid = spec->autocfg.hp_pins[i];
if (!nid)
break;
- if (snd_hda_jack_detect(codec, nid)) {
- spec->jack_present = 1;
- break;
- }
- alc_report_jack(codec, spec->autocfg.hp_pins[i]);
+ alc_report_jack(codec, nid);
+ spec->jack_present |= snd_hda_jack_detect(codec, nid);
}
mute = spec->jack_present ? HDA_AMP_MUTE : 0;
@@ -15015,7 +15012,7 @@ static struct snd_pci_quirk alc269_cfg_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x11e3, "ASUS U33Jc", ALC269VB_AMIC),
SND_PCI_QUIRK(0x1043, 0x1273, "ASUS UL80Jt", ALC269VB_AMIC),
SND_PCI_QUIRK(0x1043, 0x1283, "ASUS U53Jc", ALC269_AMIC),
- SND_PCI_QUIRK(0x1043, 0x12b3, "ASUS N82Jv", ALC269_AMIC),
+ SND_PCI_QUIRK(0x1043, 0x12b3, "ASUS N82JV", ALC269VB_AMIC),
SND_PCI_QUIRK(0x1043, 0x12d3, "ASUS N61Jv", ALC269_AMIC),
SND_PCI_QUIRK(0x1043, 0x13a3, "ASUS UL30Vt", ALC269_AMIC),
SND_PCI_QUIRK(0x1043, 0x1373, "ASUS G73JX", ALC269_AMIC),
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 9ea48b425d0b..bd7b123f6440 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -586,7 +586,12 @@ static hda_nid_t stac92hd83xxx_pin_nids[10] = {
0x0f, 0x10, 0x11, 0x1f, 0x20,
};
-static hda_nid_t stac92hd88xxx_pin_nids[10] = {
+static hda_nid_t stac92hd87xxx_pin_nids[6] = {
+ 0x0a, 0x0b, 0x0c, 0x0d,
+ 0x0f, 0x11,
+};
+
+static hda_nid_t stac92hd88xxx_pin_nids[8] = {
0x0a, 0x0b, 0x0c, 0x0d,
0x0f, 0x11, 0x1f, 0x20,
};
@@ -5430,12 +5435,13 @@ again:
switch (codec->vendor_id) {
case 0x111d76d1:
case 0x111d76d9:
+ case 0x111d76e5:
spec->dmic_nids = stac92hd87b_dmic_nids;
spec->num_dmics = stac92xx_connected_ports(codec,
stac92hd87b_dmic_nids,
STAC92HD87B_NUM_DMICS);
- spec->num_pins = ARRAY_SIZE(stac92hd88xxx_pin_nids);
- spec->pin_nids = stac92hd88xxx_pin_nids;
+ spec->num_pins = ARRAY_SIZE(stac92hd87xxx_pin_nids);
+ spec->pin_nids = stac92hd87xxx_pin_nids;
spec->mono_nid = 0;
spec->num_pwrs = 0;
break;
@@ -5443,6 +5449,7 @@ again:
case 0x111d7667:
case 0x111d7668:
case 0x111d7669:
+ case 0x111d76e3:
spec->num_dmics = stac92xx_connected_ports(codec,
stac92hd88xxx_dmic_nids,
STAC92HD88XXX_NUM_DMICS);
@@ -6387,6 +6394,8 @@ static struct hda_codec_preset snd_hda_preset_sigmatel[] = {
{ .id = 0x111d76cd, .name = "92HD89F2", .patch = patch_stac92hd73xx },
{ .id = 0x111d76ce, .name = "92HD89F1", .patch = patch_stac92hd73xx },
{ .id = 0x111d76e0, .name = "92HD91BXX", .patch = patch_stac92hd83xxx},
+ { .id = 0x111d76e3, .name = "92HD98BXX", .patch = patch_stac92hd83xxx},
+ { .id = 0x111d76e5, .name = "92HD99BXX", .patch = patch_stac92hd83xxx},
{ .id = 0x111d76e7, .name = "92HD90BXX", .patch = patch_stac92hd83xxx},
{} /* terminator */
};
diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index a76c3260d941..63b0054200a8 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -567,7 +567,7 @@ static void via_auto_init_analog_input(struct hda_codec *codec)
hda_nid_t nid = cfg->inputs[i].pin;
if (spec->smart51_enabled && is_smart51_pins(spec, nid))
ctl = PIN_OUT;
- else if (i == AUTO_PIN_MIC)
+ else if (cfg->inputs[i].type == AUTO_PIN_MIC)
ctl = PIN_VREF50;
else
ctl = PIN_IN;
diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c
index bb4bf65b9e7e..0bb424af956f 100644
--- a/sound/soc/codecs/cx20442.c
+++ b/sound/soc/codecs/cx20442.c
@@ -367,7 +367,7 @@ static int cx20442_codec_remove(struct snd_soc_codec *codec)
return 0;
}
-static const u8 cx20442_reg = CX20442_TELOUT | CX20442_MIC;
+static const u8 cx20442_reg;
static struct snd_soc_codec_driver cx20442_codec_dev = {
.probe = cx20442_codec_probe,
diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c
index 987476a5895f..017d99ceb42e 100644
--- a/sound/soc/codecs/wm8903.c
+++ b/sound/soc/codecs/wm8903.c
@@ -1482,7 +1482,7 @@ int wm8903_mic_detect(struct snd_soc_codec *codec, struct snd_soc_jack *jack,
WM8903_MICDET_EINT | WM8903_MICSHRT_EINT,
irq_mask);
- if (det && shrt) {
+ if (det || shrt) {
/* Enable mic detection, this may not have been set through
* platform data (eg, if the defaults are OK). */
snd_soc_update_bits(codec, WM8903_WRITE_SEQUENCER_0,
diff --git a/sound/soc/codecs/wm8903.h b/sound/soc/codecs/wm8903.h
index e8490f3edd03..e3ec2433b215 100644
--- a/sound/soc/codecs/wm8903.h
+++ b/sound/soc/codecs/wm8903.h
@@ -165,7 +165,7 @@ extern int wm8903_mic_detect(struct snd_soc_codec *codec,
#define WM8903_VMID_RES_50K 2
#define WM8903_VMID_RES_250K 3
-#define WM8903_VMID_RES_5K 4
+#define WM8903_VMID_RES_5K 6
/*
* R8 (0x08) - Analogue DAC 0
diff --git a/sound/soc/codecs/wm8978.c b/sound/soc/codecs/wm8978.c
index 4bbc3442703f..8dfb0a0da673 100644
--- a/sound/soc/codecs/wm8978.c
+++ b/sound/soc/codecs/wm8978.c
@@ -145,18 +145,18 @@ static const struct snd_kcontrol_new wm8978_snd_controls[] = {
SOC_SINGLE("DAC Playback Limiter Threshold",
WM8978_DAC_LIMITER_2, 4, 7, 0),
SOC_SINGLE("DAC Playback Limiter Boost",
- WM8978_DAC_LIMITER_2, 0, 15, 0),
+ WM8978_DAC_LIMITER_2, 0, 12, 0),
SOC_ENUM("ALC Enable Switch", alc1),
SOC_SINGLE("ALC Capture Min Gain", WM8978_ALC_CONTROL_1, 0, 7, 0),
SOC_SINGLE("ALC Capture Max Gain", WM8978_ALC_CONTROL_1, 3, 7, 0),
- SOC_SINGLE("ALC Capture Hold", WM8978_ALC_CONTROL_2, 4, 7, 0),
+ SOC_SINGLE("ALC Capture Hold", WM8978_ALC_CONTROL_2, 4, 10, 0),
SOC_SINGLE("ALC Capture Target", WM8978_ALC_CONTROL_2, 0, 15, 0),
SOC_ENUM("ALC Capture Mode", alc3),
- SOC_SINGLE("ALC Capture Decay", WM8978_ALC_CONTROL_3, 4, 15, 0),
- SOC_SINGLE("ALC Capture Attack", WM8978_ALC_CONTROL_3, 0, 15, 0),
+ SOC_SINGLE("ALC Capture Decay", WM8978_ALC_CONTROL_3, 4, 10, 0),
+ SOC_SINGLE("ALC Capture Attack", WM8978_ALC_CONTROL_3, 0, 10, 0),
SOC_SINGLE("ALC Capture Noise Gate Switch", WM8978_NOISE_GATE, 3, 1, 0),
SOC_SINGLE("ALC Capture Noise Gate Threshold",
@@ -211,8 +211,10 @@ static const struct snd_kcontrol_new wm8978_snd_controls[] = {
WM8978_LOUT2_SPK_CONTROL, WM8978_ROUT2_SPK_CONTROL, 6, 1, 1),
/* DAC / ADC oversampling */
- SOC_SINGLE("DAC 128x Oversampling Switch", WM8978_DAC_CONTROL, 8, 1, 0),
- SOC_SINGLE("ADC 128x Oversampling Switch", WM8978_ADC_CONTROL, 8, 1, 0),
+ SOC_SINGLE("DAC 128x Oversampling Switch", WM8978_DAC_CONTROL,
+ 5, 1, 0),
+ SOC_SINGLE("ADC 128x Oversampling Switch", WM8978_ADC_CONTROL,
+ 5, 1, 0),
};
/* Mixer #1: Output (OUT1, OUT2) Mixer: mix AUX, Input mixer output and DAC */
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index 37b8aa8a680f..c6c958ee5d59 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -107,6 +107,12 @@ struct wm8994_priv {
int revision;
struct wm8994_pdata *pdata;
+
+ unsigned int aif1clk_enable:1;
+ unsigned int aif2clk_enable:1;
+
+ unsigned int aif1clk_disable:1;
+ unsigned int aif2clk_disable:1;
};
static int wm8994_readable(unsigned int reg)
@@ -1004,6 +1010,110 @@ static void wm8994_update_class_w(struct snd_soc_codec *codec)
}
}
+static int late_enable_ev(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_codec *codec = w->codec;
+ struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+ switch (event) {
+ case SND_SOC_DAPM_PRE_PMU:
+ if (wm8994->aif1clk_enable) {
+ snd_soc_update_bits(codec, WM8994_AIF1_CLOCKING_1,
+ WM8994_AIF1CLK_ENA_MASK,
+ WM8994_AIF1CLK_ENA);
+ wm8994->aif1clk_enable = 0;
+ }
+ if (wm8994->aif2clk_enable) {
+ snd_soc_update_bits(codec, WM8994_AIF2_CLOCKING_1,
+ WM8994_AIF2CLK_ENA_MASK,
+ WM8994_AIF2CLK_ENA);
+ wm8994->aif2clk_enable = 0;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int late_disable_ev(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_codec *codec = w->codec;
+ struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+ switch (event) {
+ case SND_SOC_DAPM_POST_PMD:
+ if (wm8994->aif1clk_disable) {
+ snd_soc_update_bits(codec, WM8994_AIF1_CLOCKING_1,
+ WM8994_AIF1CLK_ENA_MASK, 0);
+ wm8994->aif1clk_disable = 0;
+ }
+ if (wm8994->aif2clk_disable) {
+ snd_soc_update_bits(codec, WM8994_AIF2_CLOCKING_1,
+ WM8994_AIF2CLK_ENA_MASK, 0);
+ wm8994->aif2clk_disable = 0;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int aif1clk_ev(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_codec *codec = w->codec;
+ struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+ switch (event) {
+ case SND_SOC_DAPM_PRE_PMU:
+ wm8994->aif1clk_enable = 1;
+ break;
+ case SND_SOC_DAPM_POST_PMD:
+ wm8994->aif1clk_disable = 1;
+ break;
+ }
+
+ return 0;
+}
+
+static int aif2clk_ev(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_codec *codec = w->codec;
+ struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
+
+ switch (event) {
+ case SND_SOC_DAPM_PRE_PMU:
+ wm8994->aif2clk_enable = 1;
+ break;
+ case SND_SOC_DAPM_POST_PMD:
+ wm8994->aif2clk_disable = 1;
+ break;
+ }
+
+ return 0;
+}
+
+static int adc_mux_ev(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ late_enable_ev(w, kcontrol, event);
+ return 0;
+}
+
+static int dac_ev(struct snd_soc_dapm_widget *w,
+ struct snd_kcontrol *kcontrol, int event)
+{
+ struct snd_soc_codec *codec = w->codec;
+ unsigned int mask = 1 << w->shift;
+
+ snd_soc_update_bits(codec, WM8994_POWER_MANAGEMENT_5,
+ mask, mask);
+ return 0;
+}
+
static const char *hp_mux_text[] = {
"Mixer",
"DAC",
@@ -1272,6 +1382,59 @@ static const struct soc_enum aif2dacr_src_enum =
static const struct snd_kcontrol_new aif2dacr_src_mux =
SOC_DAPM_ENUM("AIF2DACR Mux", aif2dacr_src_enum);
+static const struct snd_soc_dapm_widget wm8994_lateclk_revd_widgets[] = {
+SND_SOC_DAPM_SUPPLY("AIF1CLK", SND_SOC_NOPM, 0, 0, aif1clk_ev,
+ SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
+SND_SOC_DAPM_SUPPLY("AIF2CLK", SND_SOC_NOPM, 0, 0, aif2clk_ev,
+ SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
+
+SND_SOC_DAPM_PGA_E("Late DAC1L Enable PGA", SND_SOC_NOPM, 0, 0, NULL, 0,
+ late_enable_ev, SND_SOC_DAPM_PRE_PMU),
+SND_SOC_DAPM_PGA_E("Late DAC1R Enable PGA", SND_SOC_NOPM, 0, 0, NULL, 0,
+ late_enable_ev, SND_SOC_DAPM_PRE_PMU),
+SND_SOC_DAPM_PGA_E("Late DAC2L Enable PGA", SND_SOC_NOPM, 0, 0, NULL, 0,
+ late_enable_ev, SND_SOC_DAPM_PRE_PMU),
+SND_SOC_DAPM_PGA_E("Late DAC2R Enable PGA", SND_SOC_NOPM, 0, 0, NULL, 0,
+ late_enable_ev, SND_SOC_DAPM_PRE_PMU),
+
+SND_SOC_DAPM_POST("Late Disable PGA", late_disable_ev)
+};
+
+static const struct snd_soc_dapm_widget wm8994_lateclk_widgets[] = {
+SND_SOC_DAPM_SUPPLY("AIF1CLK", WM8994_AIF1_CLOCKING_1, 0, 0, NULL, 0),
+SND_SOC_DAPM_SUPPLY("AIF2CLK", WM8994_AIF2_CLOCKING_1, 0, 0, NULL, 0)
+};
+
+static const struct snd_soc_dapm_widget wm8994_dac_revd_widgets[] = {
+SND_SOC_DAPM_DAC_E("DAC2L", NULL, SND_SOC_NOPM, 3, 0,
+ dac_ev, SND_SOC_DAPM_PRE_PMU),
+SND_SOC_DAPM_DAC_E("DAC2R", NULL, SND_SOC_NOPM, 2, 0,
+ dac_ev, SND_SOC_DAPM_PRE_PMU),
+SND_SOC_DAPM_DAC_E("DAC1L", NULL, SND_SOC_NOPM, 1, 0,
+ dac_ev, SND_SOC_DAPM_PRE_PMU),
+SND_SOC_DAPM_DAC_E("DAC1R", NULL, SND_SOC_NOPM, 0, 0,
+ dac_ev, SND_SOC_DAPM_PRE_PMU),
+};
+
+static const struct snd_soc_dapm_widget wm8994_dac_widgets[] = {
+SND_SOC_DAPM_DAC("DAC2L", NULL, WM8994_POWER_MANAGEMENT_5, 3, 0),
+SND_SOC_DAPM_DAC("DAC2R", NULL, WM8994_POWER_MANAGEMENT_5, 2, 0),
+SND_SOC_DAPM_DAC("DAC1L", NULL, WM8994_POWER_MANAGEMENT_5, 1, 0),
+SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 0, 0),
+};
+
+static const struct snd_soc_dapm_widget wm8994_adc_revd_widgets[] = {
+SND_SOC_DAPM_MUX_E("ADCL Mux", WM8994_POWER_MANAGEMENT_4, 1, 0, &adcl_mux,
+ adc_mux_ev, SND_SOC_DAPM_PRE_PMU),
+SND_SOC_DAPM_MUX_E("ADCR Mux", WM8994_POWER_MANAGEMENT_4, 0, 0, &adcr_mux,
+ adc_mux_ev, SND_SOC_DAPM_PRE_PMU),
+};
+
+static const struct snd_soc_dapm_widget wm8994_adc_widgets[] = {
+SND_SOC_DAPM_MUX("ADCL Mux", WM8994_POWER_MANAGEMENT_4, 1, 0, &adcl_mux),
+SND_SOC_DAPM_MUX("ADCR Mux", WM8994_POWER_MANAGEMENT_4, 0, 0, &adcr_mux),
+};
+
static const struct snd_soc_dapm_widget wm8994_dapm_widgets[] = {
SND_SOC_DAPM_INPUT("DMIC1DAT"),
SND_SOC_DAPM_INPUT("DMIC2DAT"),
@@ -1284,9 +1447,6 @@ SND_SOC_DAPM_SUPPLY("DSP1CLK", WM8994_CLOCKING_1, 3, 0, NULL, 0),
SND_SOC_DAPM_SUPPLY("DSP2CLK", WM8994_CLOCKING_1, 2, 0, NULL, 0),
SND_SOC_DAPM_SUPPLY("DSPINTCLK", WM8994_CLOCKING_1, 1, 0, NULL, 0),
-SND_SOC_DAPM_SUPPLY("AIF1CLK", WM8994_AIF1_CLOCKING_1, 0, 0, NULL, 0),
-SND_SOC_DAPM_SUPPLY("AIF2CLK", WM8994_AIF2_CLOCKING_1, 0, 0, NULL, 0),
-
SND_SOC_DAPM_AIF_OUT("AIF1ADC1L", NULL,
0, WM8994_POWER_MANAGEMENT_4, 9, 0),
SND_SOC_DAPM_AIF_OUT("AIF1ADC1R", NULL,
@@ -1369,14 +1529,6 @@ SND_SOC_DAPM_ADC("DMIC1R", NULL, WM8994_POWER_MANAGEMENT_4, 2, 0),
SND_SOC_DAPM_ADC("ADCL", NULL, SND_SOC_NOPM, 1, 0),
SND_SOC_DAPM_ADC("ADCR", NULL, SND_SOC_NOPM, 0, 0),
-SND_SOC_DAPM_MUX("ADCL Mux", WM8994_POWER_MANAGEMENT_4, 1, 0, &adcl_mux),
-SND_SOC_DAPM_MUX("ADCR Mux", WM8994_POWER_MANAGEMENT_4, 0, 0, &adcr_mux),
-
-SND_SOC_DAPM_DAC("DAC2L", NULL, WM8994_POWER_MANAGEMENT_5, 3, 0),
-SND_SOC_DAPM_DAC("DAC2R", NULL, WM8994_POWER_MANAGEMENT_5, 2, 0),
-SND_SOC_DAPM_DAC("DAC1L", NULL, WM8994_POWER_MANAGEMENT_5, 1, 0),
-SND_SOC_DAPM_DAC("DAC1R", NULL, WM8994_POWER_MANAGEMENT_5, 0, 0),
-
SND_SOC_DAPM_MUX("Left Headphone Mux", SND_SOC_NOPM, 0, 0, &hpl_mux),
SND_SOC_DAPM_MUX("Right Headphone Mux", SND_SOC_NOPM, 0, 0, &hpr_mux),
@@ -1516,14 +1668,12 @@ static const struct snd_soc_dapm_route intercon[] = {
{ "AIF2ADC Mux", "AIF3DACDAT", "AIF3ADCDAT" },
/* DAC1 inputs */
- { "DAC1L", NULL, "DAC1L Mixer" },
{ "DAC1L Mixer", "AIF2 Switch", "AIF2DACL" },
{ "DAC1L Mixer", "AIF1.2 Switch", "AIF1DAC2L" },
{ "DAC1L Mixer", "AIF1.1 Switch", "AIF1DAC1L" },
{ "DAC1L Mixer", "Left Sidetone Switch", "Left Sidetone" },
{ "DAC1L Mixer", "Right Sidetone Switch", "Right Sidetone" },
- { "DAC1R", NULL, "DAC1R Mixer" },
{ "DAC1R Mixer", "AIF2 Switch", "AIF2DACR" },
{ "DAC1R Mixer", "AIF1.2 Switch", "AIF1DAC2R" },
{ "DAC1R Mixer", "AIF1.1 Switch", "AIF1DAC1R" },
@@ -1532,7 +1682,6 @@ static const struct snd_soc_dapm_route intercon[] = {
/* DAC2/AIF2 outputs */
{ "AIF2ADCL", NULL, "AIF2DAC2L Mixer" },
- { "DAC2L", NULL, "AIF2DAC2L Mixer" },
{ "AIF2DAC2L Mixer", "AIF2 Switch", "AIF2DACL" },
{ "AIF2DAC2L Mixer", "AIF1.2 Switch", "AIF1DAC2L" },
{ "AIF2DAC2L Mixer", "AIF1.1 Switch", "AIF1DAC1L" },
@@ -1540,7 +1689,6 @@ static const struct snd_soc_dapm_route intercon[] = {
{ "AIF2DAC2L Mixer", "Right Sidetone Switch", "Right Sidetone" },
{ "AIF2ADCR", NULL, "AIF2DAC2R Mixer" },
- { "DAC2R", NULL, "AIF2DAC2R Mixer" },
{ "AIF2DAC2R Mixer", "AIF2 Switch", "AIF2DACR" },
{ "AIF2DAC2R Mixer", "AIF1.2 Switch", "AIF1DAC2R" },
{ "AIF2DAC2R Mixer", "AIF1.1 Switch", "AIF1DAC1R" },
@@ -1584,6 +1732,24 @@ static const struct snd_soc_dapm_route intercon[] = {
{ "Right Headphone Mux", "DAC", "DAC1R" },
};
+static const struct snd_soc_dapm_route wm8994_lateclk_revd_intercon[] = {
+ { "DAC1L", NULL, "Late DAC1L Enable PGA" },
+ { "Late DAC1L Enable PGA", NULL, "DAC1L Mixer" },
+ { "DAC1R", NULL, "Late DAC1R Enable PGA" },
+ { "Late DAC1R Enable PGA", NULL, "DAC1R Mixer" },
+ { "DAC2L", NULL, "Late DAC2L Enable PGA" },
+ { "Late DAC2L Enable PGA", NULL, "AIF2DAC2L Mixer" },
+ { "DAC2R", NULL, "Late DAC2R Enable PGA" },
+ { "Late DAC2R Enable PGA", NULL, "AIF2DAC2R Mixer" }
+};
+
+static const struct snd_soc_dapm_route wm8994_lateclk_intercon[] = {
+ { "DAC1L", NULL, "DAC1L Mixer" },
+ { "DAC1R", NULL, "DAC1R Mixer" },
+ { "DAC2L", NULL, "AIF2DAC2L Mixer" },
+ { "DAC2R", NULL, "AIF2DAC2R Mixer" },
+};
+
static const struct snd_soc_dapm_route wm8994_revd_intercon[] = {
{ "AIF1DACDAT", NULL, "AIF2DACDAT" },
{ "AIF2DACDAT", NULL, "AIF1DACDAT" },
@@ -2514,6 +2680,22 @@ static int wm8994_resume(struct snd_soc_codec *codec)
{
struct wm8994_priv *wm8994 = snd_soc_codec_get_drvdata(codec);
int i, ret;
+ unsigned int val, mask;
+
+ if (wm8994->revision < 4) {
+ /* force a HW read */
+ val = wm8994_reg_read(codec->control_data,
+ WM8994_POWER_MANAGEMENT_5);
+
+ /* modify the cache only */
+ codec->cache_only = 1;
+ mask = WM8994_DAC1R_ENA | WM8994_DAC1L_ENA |
+ WM8994_DAC2R_ENA | WM8994_DAC2L_ENA;
+ val &= mask;
+ snd_soc_update_bits(codec, WM8994_POWER_MANAGEMENT_5,
+ mask, val);
+ codec->cache_only = 0;
+ }
/* Restore the registers */
ret = snd_soc_cache_sync(codec);
@@ -2847,11 +3029,10 @@ static void wm8958_default_micdet(u16 status, void *data)
report |= SND_JACK_BTN_5;
done:
- snd_soc_jack_report(wm8994->micdet[0].jack,
+ snd_soc_jack_report(wm8994->micdet[0].jack, report,
SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2 |
SND_JACK_BTN_3 | SND_JACK_BTN_4 | SND_JACK_BTN_5 |
- SND_JACK_MICROPHONE | SND_JACK_VIDEOOUT,
- report);
+ SND_JACK_MICROPHONE | SND_JACK_VIDEOOUT);
}
/**
@@ -3125,10 +3306,31 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
case WM8994:
snd_soc_dapm_new_controls(dapm, wm8994_specific_dapm_widgets,
ARRAY_SIZE(wm8994_specific_dapm_widgets));
+ if (wm8994->revision < 4) {
+ snd_soc_dapm_new_controls(dapm, wm8994_lateclk_revd_widgets,
+ ARRAY_SIZE(wm8994_lateclk_revd_widgets));
+ snd_soc_dapm_new_controls(dapm, wm8994_adc_revd_widgets,
+ ARRAY_SIZE(wm8994_adc_revd_widgets));
+ snd_soc_dapm_new_controls(dapm, wm8994_dac_revd_widgets,
+ ARRAY_SIZE(wm8994_dac_revd_widgets));
+ } else {
+ snd_soc_dapm_new_controls(dapm, wm8994_lateclk_widgets,
+ ARRAY_SIZE(wm8994_lateclk_widgets));
+ snd_soc_dapm_new_controls(dapm, wm8994_adc_widgets,
+ ARRAY_SIZE(wm8994_adc_widgets));
+ snd_soc_dapm_new_controls(dapm, wm8994_dac_widgets,
+ ARRAY_SIZE(wm8994_dac_widgets));
+ }
break;
case WM8958:
snd_soc_add_controls(codec, wm8958_snd_controls,
ARRAY_SIZE(wm8958_snd_controls));
+ snd_soc_dapm_new_controls(dapm, wm8994_lateclk_widgets,
+ ARRAY_SIZE(wm8994_lateclk_widgets));
+ snd_soc_dapm_new_controls(dapm, wm8994_adc_widgets,
+ ARRAY_SIZE(wm8994_adc_widgets));
+ snd_soc_dapm_new_controls(dapm, wm8994_dac_widgets,
+ ARRAY_SIZE(wm8994_dac_widgets));
snd_soc_dapm_new_controls(dapm, wm8958_dapm_widgets,
ARRAY_SIZE(wm8958_dapm_widgets));
break;
@@ -3143,12 +3345,19 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec)
snd_soc_dapm_add_routes(dapm, wm8994_intercon,
ARRAY_SIZE(wm8994_intercon));
- if (wm8994->revision < 4)
+ if (wm8994->revision < 4) {
snd_soc_dapm_add_routes(dapm, wm8994_revd_intercon,
ARRAY_SIZE(wm8994_revd_intercon));
-
+ snd_soc_dapm_add_routes(dapm, wm8994_lateclk_revd_intercon,
+ ARRAY_SIZE(wm8994_lateclk_revd_intercon));
+ } else {
+ snd_soc_dapm_add_routes(dapm, wm8994_lateclk_intercon,
+ ARRAY_SIZE(wm8994_lateclk_intercon));
+ }
break;
case WM8958:
+ snd_soc_dapm_add_routes(dapm, wm8994_lateclk_intercon,
+ ARRAY_SIZE(wm8994_lateclk_intercon));
snd_soc_dapm_add_routes(dapm, wm8958_intercon,
ARRAY_SIZE(wm8958_intercon));
break;
diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
index 43825b2102a5..cce704c275c6 100644
--- a/sound/soc/codecs/wm9081.c
+++ b/sound/soc/codecs/wm9081.c
@@ -15,6 +15,7 @@
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/delay.h>
+#include <linux/device.h>
#include <linux/pm.h>
#include <linux/i2c.h>
#include <linux/platform_device.h>
@@ -1341,6 +1342,10 @@ static __devinit int wm9081_i2c_probe(struct i2c_client *i2c,
wm9081->control_type = SND_SOC_I2C;
wm9081->control_data = i2c;
+ if (dev_get_platdata(&i2c->dev))
+ memcpy(&wm9081->retune, dev_get_platdata(&i2c->dev),
+ sizeof(wm9081->retune));
+
ret = snd_soc_register_codec(&i2c->dev,
&soc_codec_dev_wm9081, &wm9081_dai, 1);
if (ret < 0)
diff --git a/sound/soc/codecs/wm_hubs.c b/sound/soc/codecs/wm_hubs.c
index 613df5db0b32..516892706063 100644
--- a/sound/soc/codecs/wm_hubs.c
+++ b/sound/soc/codecs/wm_hubs.c
@@ -674,6 +674,9 @@ SND_SOC_DAPM_OUTPUT("LINEOUT2N"),
};
static const struct snd_soc_dapm_route analogue_routes[] = {
+ { "MICBIAS1", NULL, "CLK_SYS" },
+ { "MICBIAS2", NULL, "CLK_SYS" },
+
{ "IN1L PGA", "IN1LP Switch", "IN1LP" },
{ "IN1L PGA", "IN1LN Switch", "IN1LN" },
diff --git a/sound/soc/imx/eukrea-tlv320.c b/sound/soc/imx/eukrea-tlv320.c
index e20c9e1457c0..1e9bccae4e80 100644
--- a/sound/soc/imx/eukrea-tlv320.c
+++ b/sound/soc/imx/eukrea-tlv320.c
@@ -79,7 +79,7 @@ static struct snd_soc_dai_link eukrea_tlv320_dai = {
.name = "tlv320aic23",
.stream_name = "TLV320AIC23",
.codec_dai_name = "tlv320aic23-hifi",
- .platform_name = "imx-pcm-audio.0",
+ .platform_name = "imx-fiq-pcm-audio.0",
.codec_name = "tlv320aic23-codec.0-001a",
.cpu_dai_name = "imx-ssi.0",
.ops = &eukrea_tlv320_snd_ops,
diff --git a/sound/soc/omap/am3517evm.c b/sound/soc/omap/am3517evm.c
index 161750443ebc..73dde4a1adc3 100644
--- a/sound/soc/omap/am3517evm.c
+++ b/sound/soc/omap/am3517evm.c
@@ -139,7 +139,7 @@ static struct snd_soc_dai_link am3517evm_dai = {
.cpu_dai_name ="omap-mcbsp-dai.0",
.codec_dai_name = "tlv320aic23-hifi",
.platform_name = "omap-pcm-audio",
- .codec_name = "tlv320aic23-codec",
+ .codec_name = "tlv320aic23-codec.2-001a",
.init = am3517evm_aic23_init,
.ops = &am3517evm_ops,
};
diff --git a/sound/soc/pxa/e740_wm9705.c b/sound/soc/pxa/e740_wm9705.c
index 28333e7d9c50..dc65650a6fa1 100644
--- a/sound/soc/pxa/e740_wm9705.c
+++ b/sound/soc/pxa/e740_wm9705.c
@@ -117,7 +117,7 @@ static struct snd_soc_dai_link e740_dai[] = {
{
.name = "AC97",
.stream_name = "AC97 HiFi",
- .cpu_dai_name = "pxa-ac97.0",
+ .cpu_dai_name = "pxa2xx-ac97",
.codec_dai_name = "wm9705-hifi",
.platform_name = "pxa-pcm-audio",
.codec_name = "wm9705-codec",
@@ -126,7 +126,7 @@ static struct snd_soc_dai_link e740_dai[] = {
{
.name = "AC97 Aux",
.stream_name = "AC97 Aux",
- .cpu_dai_name = "pxa-ac97.1",
+ .cpu_dai_name = "pxa2xx-ac97-aux",
.codec_dai_name = "wm9705-aux",
.platform_name = "pxa-pcm-audio",
.codec_name = "wm9705-codec",
diff --git a/sound/soc/pxa/e750_wm9705.c b/sound/soc/pxa/e750_wm9705.c
index 01bf31675c55..51897fcd911b 100644
--- a/sound/soc/pxa/e750_wm9705.c
+++ b/sound/soc/pxa/e750_wm9705.c
@@ -99,7 +99,7 @@ static struct snd_soc_dai_link e750_dai[] = {
{
.name = "AC97",
.stream_name = "AC97 HiFi",
- .cpu_dai_name = "pxa-ac97.0",
+ .cpu_dai_name = "pxa2xx-ac97",
.codec_dai_name = "wm9705-hifi",
.platform_name = "pxa-pcm-audio",
.codec_name = "wm9705-codec",
@@ -109,7 +109,7 @@ static struct snd_soc_dai_link e750_dai[] = {
{
.name = "AC97 Aux",
.stream_name = "AC97 Aux",
- .cpu_dai_name = "pxa-ac97.1",
+ .cpu_dai_name = "pxa2xx-ac97-aux",
.codec_dai_name ="wm9705-aux",
.platform_name = "pxa-pcm-audio",
.codec_name = "wm9705-codec",
diff --git a/sound/soc/pxa/e800_wm9712.c b/sound/soc/pxa/e800_wm9712.c
index c6a37c6ef23b..053ed208e59f 100644
--- a/sound/soc/pxa/e800_wm9712.c
+++ b/sound/soc/pxa/e800_wm9712.c
@@ -89,7 +89,7 @@ static struct snd_soc_dai_link e800_dai[] = {
{
.name = "AC97",
.stream_name = "AC97 HiFi",
- .cpu_dai_name = "pxa-ac97.0",
+ .cpu_dai_name = "pxa2xx-ac97",
.codec_dai_name = "wm9712-hifi",
.platform_name = "pxa-pcm-audio",
.codec_name = "wm9712-codec",
@@ -98,7 +98,7 @@ static struct snd_soc_dai_link e800_dai[] = {
{
.name = "AC97 Aux",
.stream_name = "AC97 Aux",
- .cpu_dai_name = "pxa-ac97.1",
+ .cpu_dai_name = "pxa2xx-ac97-aux",
.codec_dai_name ="wm9712-aux",
.platform_name = "pxa-pcm-audio",
.codec_name = "wm9712-codec",
diff --git a/sound/soc/pxa/em-x270.c b/sound/soc/pxa/em-x270.c
index fc22e6eefc98..b13a4252812d 100644
--- a/sound/soc/pxa/em-x270.c
+++ b/sound/soc/pxa/em-x270.c
@@ -37,7 +37,7 @@ static struct snd_soc_dai_link em_x270_dai[] = {
{
.name = "AC97",
.stream_name = "AC97 HiFi",
- .cpu_dai_name = "pxa-ac97.0",
+ .cpu_dai_name = "pxa2xx-ac97",
.codec_dai_name = "wm9712-hifi",
.platform_name = "pxa-pcm-audio",
.codec_name = "wm9712-codec",
@@ -45,7 +45,7 @@ static struct snd_soc_dai_link em_x270_dai[] = {
{
.name = "AC97 Aux",
.stream_name = "AC97 Aux",
- .cpu_dai_name = "pxa-ac97.1",
+ .cpu_dai_name = "pxa2xx-ac97-aux",
.codec_dai_name ="wm9712-aux",
.platform_name = "pxa-pcm-audio",
.codec_name = "wm9712-codec",
diff --git a/sound/soc/pxa/mioa701_wm9713.c b/sound/soc/pxa/mioa701_wm9713.c
index 0d70fc8c12bd..38ca6759907e 100644
--- a/sound/soc/pxa/mioa701_wm9713.c
+++ b/sound/soc/pxa/mioa701_wm9713.c
@@ -162,7 +162,7 @@ static struct snd_soc_dai_link mioa701_dai[] = {
{
.name = "AC97",
.stream_name = "AC97 HiFi",
- .cpu_dai_name = "pxa-ac97.0",
+ .cpu_dai_name = "pxa2xx-ac97",
.codec_dai_name = "wm9713-hifi",
.codec_name = "wm9713-codec",
.init = mioa701_wm9713_init,
@@ -172,7 +172,7 @@ static struct snd_soc_dai_link mioa701_dai[] = {
{
.name = "AC97 Aux",
.stream_name = "AC97 Aux",
- .cpu_dai_name = "pxa-ac97.1",
+ .cpu_dai_name = "pxa2xx-ac97-aux",
.codec_dai_name ="wm9713-aux",
.codec_name = "wm9713-codec",
.platform_name = "pxa-pcm-audio",
diff --git a/sound/soc/pxa/palm27x.c b/sound/soc/pxa/palm27x.c
index 857db96d4a4f..504e4004f004 100644
--- a/sound/soc/pxa/palm27x.c
+++ b/sound/soc/pxa/palm27x.c
@@ -132,7 +132,7 @@ static struct snd_soc_dai_link palm27x_dai[] = {
{
.name = "AC97 HiFi",
.stream_name = "AC97 HiFi",
- .cpu_dai_name = "pxa-ac97.0",
+ .cpu_dai_name = "pxa2xx-ac97",
.codec_dai_name = "wm9712-hifi",
.codec_name = "wm9712-codec",
.platform_name = "pxa-pcm-audio",
@@ -141,7 +141,7 @@ static struct snd_soc_dai_link palm27x_dai[] = {
{
.name = "AC97 Aux",
.stream_name = "AC97 Aux",
- .cpu_dai_name = "pxa-ac97.1",
+ .cpu_dai_name = "pxa2xx-ac97-aux",
.codec_dai_name = "wm9712-aux",
.codec_name = "wm9712-codec",
.platform_name = "pxa-pcm-audio",
diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c
index f75804ef0897..4b6e5d608b42 100644
--- a/sound/soc/pxa/tosa.c
+++ b/sound/soc/pxa/tosa.c
@@ -219,7 +219,7 @@ static struct snd_soc_dai_link tosa_dai[] = {
{
.name = "AC97",
.stream_name = "AC97 HiFi",
- .cpu_dai_name = "pxa-ac97.0",
+ .cpu_dai_name = "pxa2xx-ac97",
.codec_dai_name = "wm9712-hifi",
.platform_name = "pxa-pcm-audio",
.codec_name = "wm9712-codec",
@@ -229,7 +229,7 @@ static struct snd_soc_dai_link tosa_dai[] = {
{
.name = "AC97 Aux",
.stream_name = "AC97 Aux",
- .cpu_dai_name = "pxa-ac97.1",
+ .cpu_dai_name = "pxa2xx-ac97-aux",
.codec_dai_name = "wm9712-aux",
.platform_name = "pxa-pcm-audio",
.codec_name = "wm9712-codec",
diff --git a/sound/soc/pxa/zylonite.c b/sound/soc/pxa/zylonite.c
index b222a7d72027..25bba108fea3 100644
--- a/sound/soc/pxa/zylonite.c
+++ b/sound/soc/pxa/zylonite.c
@@ -166,7 +166,7 @@ static struct snd_soc_dai_link zylonite_dai[] = {
.stream_name = "AC97 HiFi",
.codec_name = "wm9713-codec",
.platform_name = "pxa-pcm-audio",
- .cpu_dai_name = "pxa-ac97.0",
+ .cpu_dai_name = "pxa2xx-ac97",
.codec_name = "wm9713-hifi",
.init = zylonite_wm9713_init,
},
@@ -175,7 +175,7 @@ static struct snd_soc_dai_link zylonite_dai[] = {
.stream_name = "AC97 Aux",
.codec_name = "wm9713-codec",
.platform_name = "pxa-pcm-audio",
- .cpu_dai_name = "pxa-ac97.1",
+ .cpu_dai_name = "pxa2xx-ac97-aux",
.codec_name = "wm9713-aux",
},
{
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 8194f150bab7..1790f83ee665 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -712,7 +712,15 @@ static int dapm_supply_check_power(struct snd_soc_dapm_widget *w)
!path->connected(path->source, path->sink))
continue;
- if (path->sink && path->sink->power_check &&
+ if (!path->sink)
+ continue;
+
+ if (path->sink->force) {
+ power = 1;
+ break;
+ }
+
+ if (path->sink->power_check &&
path->sink->power_check(path->sink)) {
power = 1;
break;
@@ -933,7 +941,7 @@ static void dapm_seq_run(struct snd_soc_dapm_context *dapm,
}
if (!list_empty(&pending))
- dapm_seq_run_coalesced(dapm, &pending);
+ dapm_seq_run_coalesced(cur_dapm, &pending);
}
static void dapm_widget_update(struct snd_soc_dapm_context *dapm)
@@ -1627,6 +1635,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dapm_add_routes);
int snd_soc_dapm_new_widgets(struct snd_soc_dapm_context *dapm)
{
struct snd_soc_dapm_widget *w;
+ unsigned int val;
list_for_each_entry(w, &dapm->card->widgets, list)
{
@@ -1675,6 +1684,18 @@ int snd_soc_dapm_new_widgets(struct snd_soc_dapm_context *dapm)
case snd_soc_dapm_post:
break;
}
+
+ /* Read the initial power state from the device */
+ if (w->reg >= 0) {
+ val = snd_soc_read(w->codec, w->reg);
+ val &= 1 << w->shift;
+ if (w->invert)
+ val = !val;
+
+ if (val)
+ w->power = 1;
+ }
+
w->new = 1;
}
diff --git a/sound/usb/caiaq/audio.c b/sound/usb/caiaq/audio.c
index 68b97477577b..66eabafb1c24 100644
--- a/sound/usb/caiaq/audio.c
+++ b/sound/usb/caiaq/audio.c
@@ -785,7 +785,7 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev)
}
dev->pcm->private_data = dev;
- strcpy(dev->pcm->name, dev->product_name);
+ strlcpy(dev->pcm->name, dev->product_name, sizeof(dev->pcm->name));
memset(dev->sub_playback, 0, sizeof(dev->sub_playback));
memset(dev->sub_capture, 0, sizeof(dev->sub_capture));
diff --git a/sound/usb/caiaq/midi.c b/sound/usb/caiaq/midi.c
index 2f218c77fff2..a1a47088fd0c 100644
--- a/sound/usb/caiaq/midi.c
+++ b/sound/usb/caiaq/midi.c
@@ -136,7 +136,7 @@ int snd_usb_caiaq_midi_init(struct snd_usb_caiaqdev *device)
if (ret < 0)
return ret;
- strcpy(rmidi->name, device->product_name);
+ strlcpy(rmidi->name, device->product_name, sizeof(rmidi->name));
rmidi->info_flags = SNDRV_RAWMIDI_INFO_DUPLEX;
rmidi->private_data = device;
diff --git a/sound/usb/card.c b/sound/usb/card.c
index 800f7cb4f251..c0f8270bc199 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -323,6 +323,7 @@ static int snd_usb_audio_create(struct usb_device *dev, int idx,
return -ENOMEM;
}
+ mutex_init(&chip->shutdown_mutex);
chip->index = idx;
chip->dev = dev;
chip->card = card;
@@ -531,6 +532,7 @@ static void snd_usb_audio_disconnect(struct usb_device *dev, void *ptr)
chip = ptr;
card = chip->card;
mutex_lock(&register_mutex);
+ mutex_lock(&chip->shutdown_mutex);
chip->shutdown = 1;
chip->num_interfaces--;
if (chip->num_interfaces <= 0) {
@@ -548,9 +550,11 @@ static void snd_usb_audio_disconnect(struct usb_device *dev, void *ptr)
snd_usb_mixer_disconnect(p);
}
usb_chip[chip->index] = NULL;
+ mutex_unlock(&chip->shutdown_mutex);
mutex_unlock(&register_mutex);
snd_card_free_when_closed(card);
} else {
+ mutex_unlock(&chip->shutdown_mutex);
mutex_unlock(&register_mutex);
}
}
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 4132522ac90f..e3f680526cb5 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -361,6 +361,7 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
}
if (changed) {
+ mutex_lock(&subs->stream->chip->shutdown_mutex);
/* format changed */
snd_usb_release_substream_urbs(subs, 0);
/* influenced: period_bytes, channels, rate, format, */
@@ -368,6 +369,7 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
params_rate(hw_params),
snd_pcm_format_physical_width(params_format(hw_params)) *
params_channels(hw_params));
+ mutex_unlock(&subs->stream->chip->shutdown_mutex);
}
return ret;
@@ -385,8 +387,9 @@ static int snd_usb_hw_free(struct snd_pcm_substream *substream)
subs->cur_audiofmt = NULL;
subs->cur_rate = 0;
subs->period_bytes = 0;
- if (!subs->stream->chip->shutdown)
- snd_usb_release_substream_urbs(subs, 0);
+ mutex_lock(&subs->stream->chip->shutdown_mutex);
+ snd_usb_release_substream_urbs(subs, 0);
+ mutex_unlock(&subs->stream->chip->shutdown_mutex);
return snd_pcm_lib_free_vmalloc_buffer(substream);
}
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index db3eb21627ee..6e66fffe87f5 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -36,6 +36,7 @@ struct snd_usb_audio {
struct snd_card *card;
u32 usb_id;
int shutdown;
+ struct mutex shutdown_mutex;
unsigned int txfr_quirk:1; /* Subframe boundaries on transfers */
int num_interfaces;
int num_suspended_intf;
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index cb43289e447f..416684be0ad3 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -1,4 +1,3 @@
-PERF-BUILD-OPTIONS
PERF-CFLAGS
PERF-GUI-VARS
PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index bd498d496952..4626a398836a 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -178,8 +178,8 @@ install-pdf: pdf
$(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
$(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir)
-install-html: html
- '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
+#install-html: html
+# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
$(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE
@@ -288,15 +288,16 @@ $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
mv $@+ $@
-install-webdoc : html
- '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
+# UNIMPLEMENTED
+#install-webdoc : html
+# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
-quick-install: quick-install-man
+# quick-install: quick-install-man
-quick-install-man:
- '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
+# quick-install-man:
+# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
-quick-install-html:
- '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
+#quick-install-html:
+# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
.PHONY: .FORCE-PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 399751befeed..7a527f7e9da9 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,7 +8,7 @@ perf-list - List all symbolic event types
SYNOPSIS
--------
[verse]
-'perf list'
+'perf list' [hw|sw|cache|tracepoint|event_glob]
DESCRIPTION
-----------
@@ -63,7 +63,26 @@ details. Some of them are referenced in the SEE ALSO section below.
OPTIONS
-------
-None
+
+Without options all known events will be listed.
+
+To limit the list use:
+
+. 'hw' or 'hardware' to list hardware events such as cache-misses, etc.
+
+. 'sw' or 'software' to list software events such as context switches, etc.
+
+. 'cache' or 'hwcache' to list hardware cache events such as L1-dcache-loads, etc.
+
+. 'tracepoint' to list all tracepoint events, alternatively use
+ 'subsys_glob:event_glob' to filter by tracepoint subsystems such as sched,
+ block, etc.
+
+. If none of the above is matched, it will apply the supplied glob to all
+ events, printing the ones that match.
+
+One or more types can be used at the same time, listing the events for the
+types specified.
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 921de259ea10..4a26a2f3a6a3 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -24,8 +24,8 @@ and statistics with this 'perf lock' command.
'perf lock report' reports statistical data.
-OPTIONS
--------
+COMMON OPTIONS
+--------------
-i::
--input=<file>::
@@ -39,6 +39,14 @@ OPTIONS
--dump-raw-trace::
Dump raw trace in ASCII.
+REPORT OPTIONS
+--------------
+
+-k::
+--key=<value>::
+ Sorting key. Possible values: acquired (default), contended,
+ wait_total, wait_max, wait_min.
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 86b797a35aa6..02bafce4b341 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -16,7 +16,7 @@ or
or
'perf probe' --list
or
-'perf probe' [options] --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
+'perf probe' [options] --line='LINE'
or
'perf probe' [options] --vars='PROBEPOINT'
@@ -73,6 +73,17 @@ OPTIONS
(Only for --vars) Show external defined variables in addition to local
variables.
+-F::
+--funcs::
+ Show available functions in given module or kernel.
+
+--filter=FILTER::
+ (Only for --vars and --funcs) Set filter. FILTER is a combination of glob
+ pattern, see FILTER PATTERN for detail.
+ Default FILTER is "!__k???tab_* & !__crc_*" for --vars, and "!_*"
+ for --funcs.
+ If several filters are specified, only the last filter is used.
+
-f::
--force::
Forcibly add events with existing name.
@@ -117,13 +128,14 @@ LINE SYNTAX
-----------
Line range is described by following syntax.
- "FUNC[:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
+ "FUNC[@SRC][:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
FUNC specifies the function name of showing lines. 'RLN' is the start line
number from function entry line, and 'RLN2' is the end line number. As same as
probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
and 'ALN2' is end line number in the file. It is also possible to specify how
-many lines to show by using 'NUM'.
+many lines to show by using 'NUM'. Moreover, 'FUNC@SRC' combination is good
+for searching a specific function when several functions share same name.
So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
LAZY MATCHING
@@ -135,6 +147,14 @@ e.g.
This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.)
+FILTER PATTERN
+--------------
+ The filter pattern is a glob matching pattern(s) to filter variables.
+ In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")".
+
+e.g.
+ With --filter "foo* | bar*", perf probe -V shows variables which start with "foo" or "bar".
+ With --filter "!foo* & *bar", perf probe -V shows variables which don't start with "foo" and end with "bar", like "fizzbar". But "foobar" is filtered out.
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e032716c839b..5a520f825295 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -137,6 +137,17 @@ Do not update the builid cache. This saves some overhead in situations
where the information in the perf.data file (which includes buildids)
is sufficient.
+-G name,...::
+--cgroup name,...::
+monitor only in the container (cgroup) called "name". This option is available only
+in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
+container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
+can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
+to first event, second cgroup to second event and so on. It is possible to provide
+an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
+corresponding events, i.e., they always refer to events defined earlier on the command
+line.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index b6da7affbbee..918cc38ee6d1 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -83,6 +83,17 @@ This option is only valid in system-wide mode.
print counts using a CSV-style output to make it easy to import directly into
spreadsheets. Columns are separated by the string specified in SEP.
+-G name::
+--cgroup name::
+monitor only in the container (cgroup) called "name". This option is available only
+in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
+container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
+can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
+to first event, second cgroup to second event and so on. It is possible to provide
+an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
+corresponding events, i.e., they always refer to events defined earlier on the command
+line.
+
EXAMPLES
--------
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7141c42e1469..9b8421805c5c 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -3,7 +3,7 @@ ifeq ("$(origin O)", "command line")
endif
# The default target of this Makefile is...
-all::
+all:
ifneq ($(OUTPUT),)
# check that the output directory actually exists
@@ -11,152 +11,12 @@ OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
endif
-# Define V=1 to have a more verbose compile.
-# Define V=2 to have an even more verbose compile.
-#
-# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf()
-# or vsnprintf() return -1 instead of number of characters which would
-# have been written to the final string if enough space had been available.
-#
-# Define FREAD_READS_DIRECTORIES if your are on a system which succeeds
-# when attempting to read from an fopen'ed directory.
-#
-# Define NO_OPENSSL environment variable if you do not have OpenSSL.
-# This also implies MOZILLA_SHA1.
-#
-# Define CURLDIR=/foo/bar if your curl header and library files are in
-# /foo/bar/include and /foo/bar/lib directories.
-#
-# Define EXPATDIR=/foo/bar if your expat header and library files are in
-# /foo/bar/include and /foo/bar/lib directories.
-#
-# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent.
-#
-# Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks
-# d_type in struct dirent (latest Cygwin -- will be fixed soonish).
-#
-# Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.)
-# do not support the 'size specifiers' introduced by C99, namely ll, hh,
-# j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t).
-# some C compilers supported these specifiers prior to C99 as an extension.
-#
-# Define NO_STRCASESTR if you don't have strcasestr.
-#
-# Define NO_MEMMEM if you don't have memmem.
-#
-# Define NO_STRTOUMAX if you don't have strtoumax in the C library.
-# If your compiler also does not support long long or does not have
-# strtoull, define NO_STRTOULL.
-#
-# Define NO_SETENV if you don't have setenv in the C library.
-#
-# Define NO_UNSETENV if you don't have unsetenv in the C library.
-#
-# Define NO_MKDTEMP if you don't have mkdtemp in the C library.
-#
-# Define NO_SYS_SELECT_H if you don't have sys/select.h.
-#
-# Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link.
-# Enable it on Windows. By default, symrefs are still used.
-#
-# Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability
-# tests. These tests take up a significant amount of the total test time
-# but are not needed unless you plan to talk to SVN repos.
-#
-# Define NO_FINK if you are building on Darwin/Mac OS X, have Fink
-# installed in /sw, but don't want PERF to link against any libraries
-# installed there. If defined you may specify your own (or Fink's)
-# include directories and library directories by defining CFLAGS
-# and LDFLAGS appropriately.
-#
-# Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X,
-# have DarwinPorts installed in /opt/local, but don't want PERF to
-# link against any libraries installed there. If defined you may
-# specify your own (or DarwinPort's) include directories and
-# library directories by defining CFLAGS and LDFLAGS appropriately.
-#
-# Define PPC_SHA1 environment variable when running make to make use of
-# a bundled SHA1 routine optimized for PowerPC.
-#
-# Define ARM_SHA1 environment variable when running make to make use of
-# a bundled SHA1 routine optimized for ARM.
-#
-# Define MOZILLA_SHA1 environment variable when running make to make use of
-# a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast
-# on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default
-# choice) has very fast version optimized for i586.
-#
-# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin).
-#
-# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin).
-#
-# Define NEEDS_SOCKET if linking with libc is not enough (SunOS,
-# Patrick Mauritz).
-#
-# Define NO_MMAP if you want to avoid mmap.
-#
-# Define NO_PTHREADS if you do not have or do not want to use Pthreads.
-#
-# Define NO_PREAD if you have a problem with pread() system call (e.g.
-# cygwin.dll before v1.5.22).
-#
-# Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is
-# generally faster on your platform than accessing the working directory.
-#
-# Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support
-# the executable mode bit, but doesn't really do so.
-#
-# Define NO_IPV6 if you lack IPv6 support and getaddrinfo().
-#
-# Define NO_SOCKADDR_STORAGE if your platform does not have struct
-# sockaddr_storage.
-#
-# Define NO_ICONV if your libc does not properly support iconv.
-#
-# Define OLD_ICONV if your library has an old iconv(), where the second
-# (input buffer pointer) parameter is declared with type (const char **).
-#
-# Define NO_DEFLATE_BOUND if your zlib does not have deflateBound.
-#
-# Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib"
-# that tells runtime paths to dynamic libraries;
-# "-Wl,-rpath=/path/lib" is used instead.
-#
-# Define USE_NSEC below if you want perf to care about sub-second file mtimes
-# and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and
-# it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely
-# randomly break unless your underlying filesystem supports those sub-second
-# times (my ext3 doesn't).
-#
-# Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of
-# "st_ctim"
-#
-# Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec"
-# available. This automatically turns USE_NSEC off.
-#
-# Define USE_STDEV below if you want perf to care about the underlying device
-# change being considered an inode change from the update-index perspective.
-#
-# Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks
-# field that counts the on-disk footprint in 512-byte blocks.
+# Define V to have a more verbose compile.
#
# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
#
# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
#
-# Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's
-# MakeMaker (e.g. using ActiveState under Cygwin).
-#
-# Define NO_PERL if you do not want Perl scripts or libraries at all.
-#
-# Define INTERNAL_QSORT to use Git's implementation of qsort(), which
-# is a simplified version of the merge sort used in glibc. This is
-# recommended if Git triggers O(n^2) behavior in your platform's qsort().
-#
-# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call
-# your external grep (e.g., if your system lacks grep, if its grep is
-# broken, or spawning external process is slower than built-in grep perf has).
-#
# Define LDFLAGS=-static to build a static binary.
#
# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
@@ -167,12 +27,7 @@ $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
@$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
-include $(OUTPUT)PERF-VERSION-FILE
-uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
-uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
-uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not')
-uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
-uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
-uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
+uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
-e s/arm.*/arm/ -e s/sa110/arm/ \
@@ -191,8 +46,6 @@ ifeq ($(ARCH),x86_64)
ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
endif
-# CFLAGS and LDFLAGS are for the users to override from the command line.
-
#
# Include saner warnings here, which can catch bugs:
#
@@ -270,22 +123,13 @@ CC = $(CROSS_COMPILE)gcc
AR = $(CROSS_COMPILE)ar
RM = rm -f
MKDIR = mkdir
-TAR = tar
FIND = find
INSTALL = install
-RPMBUILD = rpmbuild
-PTHREAD_LIBS = -lpthread
# sparse is architecture-neutral, which means that we need to tell it
# explicitly what architecture to check for. Fix this up for yours..
SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
-ifeq ($(V), 2)
- QUIET_STDERR = ">/dev/null"
-else
- QUIET_STDERR = ">/dev/null 2>&1"
-endif
-
-include feature-tests.mak
ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y)
@@ -310,49 +154,37 @@ BASIC_LDFLAGS =
# Guard against environment variables
BUILTIN_OBJS =
-BUILT_INS =
-COMPAT_CFLAGS =
-COMPAT_OBJS =
LIB_H =
LIB_OBJS =
-SCRIPT_PERL =
+PYRF_OBJS =
SCRIPT_SH =
-TEST_PROGRAMS =
SCRIPT_SH += perf-archive.sh
grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1))
+$(OUTPUT)python/perf.so: $(PYRF_OBJS)
+ $(QUIET_GEN)python util/setup.py --quiet build_ext --build-lib='$(OUTPUT)python' \
+ --build-temp='$(OUTPUT)python/temp'
#
# No Perl scripts right now:
#
-# SCRIPT_PERL += perf-add--interactive.perl
-
-SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \
- $(patsubst %.perl,%,$(SCRIPT_PERL))
-
-# Empty...
-EXTRA_PROGRAMS =
-
-# ... and all the rest that could be moved out of bindir to perfexecdir
-PROGRAMS += $(EXTRA_PROGRAMS)
+SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
#
# Single 'perf' binary right now:
#
PROGRAMS += $(OUTPUT)perf
-# List built-in command $C whose implementation cmd_$C() is not in
-# builtin-$C.o but is linked in as part of some other command.
-#
+LANG_BINDINGS =
# what 'all' will build and 'install' will install, in perfexecdir
ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
# what 'all' will build but not install in perfexecdir
-OTHER_PROGRAMS = $(OUTPUT)perf$X
+OTHER_PROGRAMS = $(OUTPUT)perf
# Set paths to tools early so that they can be used for version tests.
ifndef SHELL_PATH
@@ -395,6 +227,7 @@ LIB_H += util/include/dwarf-regs.h
LIB_H += util/include/asm/dwarf2.h
LIB_H += util/include/asm/cpufeature.h
LIB_H += perf.h
+LIB_H += util/annotate.h
LIB_H += util/cache.h
LIB_H += util/callchain.h
LIB_H += util/build-id.h
@@ -402,6 +235,7 @@ LIB_H += util/debug.h
LIB_H += util/debugfs.h
LIB_H += util/event.h
LIB_H += util/evsel.h
+LIB_H += util/evlist.h
LIB_H += util/exec_cmd.h
LIB_H += util/types.h
LIB_H += util/levenshtein.h
@@ -416,6 +250,7 @@ LIB_H += util/help.h
LIB_H += util/session.h
LIB_H += util/strbuf.h
LIB_H += util/strlist.h
+LIB_H += util/strfilter.h
LIB_H += util/svghelper.h
LIB_H += util/run-command.h
LIB_H += util/sigchain.h
@@ -425,21 +260,26 @@ LIB_H += util/values.h
LIB_H += util/sort.h
LIB_H += util/hist.h
LIB_H += util/thread.h
+LIB_H += util/thread_map.h
LIB_H += util/trace-event.h
LIB_H += util/probe-finder.h
LIB_H += util/probe-event.h
LIB_H += util/pstack.h
LIB_H += util/cpumap.h
+LIB_H += util/top.h
LIB_H += $(ARCH_INCLUDE)
+LIB_H += util/cgroup.h
LIB_OBJS += $(OUTPUT)util/abspath.o
LIB_OBJS += $(OUTPUT)util/alias.o
+LIB_OBJS += $(OUTPUT)util/annotate.o
LIB_OBJS += $(OUTPUT)util/build-id.o
LIB_OBJS += $(OUTPUT)util/config.o
LIB_OBJS += $(OUTPUT)util/ctype.o
LIB_OBJS += $(OUTPUT)util/debugfs.o
LIB_OBJS += $(OUTPUT)util/environment.o
LIB_OBJS += $(OUTPUT)util/event.o
+LIB_OBJS += $(OUTPUT)util/evlist.o
LIB_OBJS += $(OUTPUT)util/evsel.o
LIB_OBJS += $(OUTPUT)util/exec_cmd.o
LIB_OBJS += $(OUTPUT)util/help.o
@@ -455,6 +295,8 @@ LIB_OBJS += $(OUTPUT)util/quote.o
LIB_OBJS += $(OUTPUT)util/strbuf.o
LIB_OBJS += $(OUTPUT)util/string.o
LIB_OBJS += $(OUTPUT)util/strlist.o
+LIB_OBJS += $(OUTPUT)util/strfilter.o
+LIB_OBJS += $(OUTPUT)util/top.o
LIB_OBJS += $(OUTPUT)util/usage.o
LIB_OBJS += $(OUTPUT)util/wrapper.o
LIB_OBJS += $(OUTPUT)util/sigchain.o
@@ -469,6 +311,7 @@ LIB_OBJS += $(OUTPUT)util/map.o
LIB_OBJS += $(OUTPUT)util/pstack.o
LIB_OBJS += $(OUTPUT)util/session.o
LIB_OBJS += $(OUTPUT)util/thread.o
+LIB_OBJS += $(OUTPUT)util/thread_map.o
LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
LIB_OBJS += $(OUTPUT)util/trace-event-read.o
LIB_OBJS += $(OUTPUT)util/trace-event-info.o
@@ -480,6 +323,7 @@ LIB_OBJS += $(OUTPUT)util/probe-event.o
LIB_OBJS += $(OUTPUT)util/util.o
LIB_OBJS += $(OUTPUT)util/xyarray.o
LIB_OBJS += $(OUTPUT)util/cpumap.o
+LIB_OBJS += $(OUTPUT)util/cgroup.o
BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
@@ -514,6 +358,20 @@ BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
PERFLIBS = $(LIB_FILE)
+# Files needed for the python binding, perf.so
+# pyrf is just an internal name needed for all those wrappers.
+# This has to be in sync with what is in the 'sources' variable in
+# tools/perf/util/setup.py
+
+PYRF_OBJS += $(OUTPUT)util/cpumap.o
+PYRF_OBJS += $(OUTPUT)util/ctype.o
+PYRF_OBJS += $(OUTPUT)util/evlist.o
+PYRF_OBJS += $(OUTPUT)util/evsel.o
+PYRF_OBJS += $(OUTPUT)util/python.o
+PYRF_OBJS += $(OUTPUT)util/thread_map.o
+PYRF_OBJS += $(OUTPUT)util/util.o
+PYRF_OBJS += $(OUTPUT)util/xyarray.o
+
#
# Platform specific tweaks
#
@@ -535,22 +393,6 @@ endif # NO_DWARF
-include arch/$(ARCH)/Makefile
-ifeq ($(uname_S),Darwin)
- ifndef NO_FINK
- ifeq ($(shell test -d /sw/lib && echo y),y)
- BASIC_CFLAGS += -I/sw/include
- BASIC_LDFLAGS += -L/sw/lib
- endif
- endif
- ifndef NO_DARWIN_PORTS
- ifeq ($(shell test -d /opt/local/lib && echo y),y)
- BASIC_CFLAGS += -I/opt/local/include
- BASIC_LDFLAGS += -L/opt/local/lib
- endif
- endif
- PTHREAD_LIBS =
-endif
-
ifneq ($(OUTPUT),)
BASIC_CFLAGS += -I$(OUTPUT)
endif
@@ -595,6 +437,7 @@ else
LIB_OBJS += $(OUTPUT)util/ui/browsers/annotate.o
LIB_OBJS += $(OUTPUT)util/ui/browsers/hists.o
LIB_OBJS += $(OUTPUT)util/ui/browsers/map.o
+ LIB_OBJS += $(OUTPUT)util/ui/browsers/top.o
LIB_OBJS += $(OUTPUT)util/ui/helpline.o
LIB_OBJS += $(OUTPUT)util/ui/progress.o
LIB_OBJS += $(OUTPUT)util/ui/util.o
@@ -604,6 +447,7 @@ else
LIB_H += util/ui/libslang.h
LIB_H += util/ui/progress.h
LIB_H += util/ui/util.h
+ LIB_H += util/ui/ui.h
endif
endif
@@ -635,12 +479,14 @@ else
PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
+ msg := $(warning No Python.h found, install python-dev[el] to have python support in 'perf script' and to build the python bindings)
BASIC_CFLAGS += -DNO_LIBPYTHON
else
ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
EXTLIBS += $(PYTHON_EMBED_LIBADD)
LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
+ LANG_BINDINGS += $(OUTPUT)python/perf.so
endif
endif
@@ -690,201 +536,13 @@ else
endif
endif
-ifndef CC_LD_DYNPATH
- ifdef NO_R_TO_GCC_LINKER
- # Some gcc does not accept and pass -R to the linker to specify
- # the runtime dynamic library path.
- CC_LD_DYNPATH = -Wl,-rpath,
- else
- CC_LD_DYNPATH = -R
- endif
-endif
-
-ifdef NEEDS_SOCKET
- EXTLIBS += -lsocket
-endif
-ifdef NEEDS_NSL
- EXTLIBS += -lnsl
-endif
-ifdef NO_D_TYPE_IN_DIRENT
- BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT
-endif
-ifdef NO_D_INO_IN_DIRENT
- BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT
-endif
-ifdef NO_ST_BLOCKS_IN_STRUCT_STAT
- BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT
-endif
-ifdef USE_NSEC
- BASIC_CFLAGS += -DUSE_NSEC
-endif
-ifdef USE_ST_TIMESPEC
- BASIC_CFLAGS += -DUSE_ST_TIMESPEC
-endif
-ifdef NO_NSEC
- BASIC_CFLAGS += -DNO_NSEC
-endif
-ifdef NO_C99_FORMAT
- BASIC_CFLAGS += -DNO_C99_FORMAT
-endif
-ifdef SNPRINTF_RETURNS_BOGUS
- COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS
- COMPAT_OBJS += $(OUTPUT)compat/snprintf.o
-endif
-ifdef FREAD_READS_DIRECTORIES
- COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES
- COMPAT_OBJS += $(OUTPUT)compat/fopen.o
-endif
-ifdef NO_SYMLINK_HEAD
- BASIC_CFLAGS += -DNO_SYMLINK_HEAD
-endif
-ifdef NO_STRCASESTR
- COMPAT_CFLAGS += -DNO_STRCASESTR
- COMPAT_OBJS += $(OUTPUT)compat/strcasestr.o
-endif
-ifdef NO_STRTOUMAX
- COMPAT_CFLAGS += -DNO_STRTOUMAX
- COMPAT_OBJS += $(OUTPUT)compat/strtoumax.o
-endif
-ifdef NO_STRTOULL
- COMPAT_CFLAGS += -DNO_STRTOULL
-endif
-ifdef NO_SETENV
- COMPAT_CFLAGS += -DNO_SETENV
- COMPAT_OBJS += $(OUTPUT)compat/setenv.o
-endif
-ifdef NO_MKDTEMP
- COMPAT_CFLAGS += -DNO_MKDTEMP
- COMPAT_OBJS += $(OUTPUT)compat/mkdtemp.o
-endif
-ifdef NO_UNSETENV
- COMPAT_CFLAGS += -DNO_UNSETENV
- COMPAT_OBJS += $(OUTPUT)compat/unsetenv.o
-endif
-ifdef NO_SYS_SELECT_H
- BASIC_CFLAGS += -DNO_SYS_SELECT_H
-endif
-ifdef NO_MMAP
- COMPAT_CFLAGS += -DNO_MMAP
- COMPAT_OBJS += $(OUTPUT)compat/mmap.o
-else
- ifdef USE_WIN32_MMAP
- COMPAT_CFLAGS += -DUSE_WIN32_MMAP
- COMPAT_OBJS += $(OUTPUT)compat/win32mmap.o
- endif
-endif
-ifdef NO_PREAD
- COMPAT_CFLAGS += -DNO_PREAD
- COMPAT_OBJS += $(OUTPUT)compat/pread.o
-endif
-ifdef NO_FAST_WORKING_DIRECTORY
- BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY
-endif
-ifdef NO_TRUSTABLE_FILEMODE
- BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE
-endif
-ifdef NO_IPV6
- BASIC_CFLAGS += -DNO_IPV6
-endif
-ifdef NO_UINTMAX_T
- BASIC_CFLAGS += -Duintmax_t=uint32_t
-endif
-ifdef NO_SOCKADDR_STORAGE
-ifdef NO_IPV6
- BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in
-else
- BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6
-endif
-endif
-ifdef NO_INET_NTOP
- LIB_OBJS += $(OUTPUT)compat/inet_ntop.o
-endif
-ifdef NO_INET_PTON
- LIB_OBJS += $(OUTPUT)compat/inet_pton.o
-endif
-
-ifdef NO_ICONV
- BASIC_CFLAGS += -DNO_ICONV
-endif
-
-ifdef OLD_ICONV
- BASIC_CFLAGS += -DOLD_ICONV
-endif
-
-ifdef NO_DEFLATE_BOUND
- BASIC_CFLAGS += -DNO_DEFLATE_BOUND
-endif
-
-ifdef PPC_SHA1
- SHA1_HEADER = "ppc/sha1.h"
- LIB_OBJS += $(OUTPUT)ppc/sha1.o ppc/sha1ppc.o
-else
-ifdef ARM_SHA1
- SHA1_HEADER = "arm/sha1.h"
- LIB_OBJS += $(OUTPUT)arm/sha1.o $(OUTPUT)arm/sha1_arm.o
-else
-ifdef MOZILLA_SHA1
- SHA1_HEADER = "mozilla-sha1/sha1.h"
- LIB_OBJS += $(OUTPUT)mozilla-sha1/sha1.o
-else
- SHA1_HEADER = <openssl/sha.h>
- EXTLIBS += $(LIB_4_CRYPTO)
-endif
-endif
-endif
-ifdef NO_PERL_MAKEMAKER
- export NO_PERL_MAKEMAKER
-endif
-ifdef NO_HSTRERROR
- COMPAT_CFLAGS += -DNO_HSTRERROR
- COMPAT_OBJS += $(OUTPUT)compat/hstrerror.o
-endif
-ifdef NO_MEMMEM
- COMPAT_CFLAGS += -DNO_MEMMEM
- COMPAT_OBJS += $(OUTPUT)compat/memmem.o
-endif
-ifdef INTERNAL_QSORT
- COMPAT_CFLAGS += -DINTERNAL_QSORT
- COMPAT_OBJS += $(OUTPUT)compat/qsort.o
-endif
-ifdef RUNTIME_PREFIX
- COMPAT_CFLAGS += -DRUNTIME_PREFIX
-endif
-
-ifdef DIR_HAS_BSD_GROUP_SEMANTICS
- COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS
-endif
-ifdef NO_EXTERNAL_GREP
- BASIC_CFLAGS += -DNO_EXTERNAL_GREP
-endif
-
-ifeq ($(PERL_PATH),)
-NO_PERL=NoThanks
-endif
-
-QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir
-QUIET_SUBDIR1 =
-
-ifneq ($(findstring $(MAKEFLAGS),w),w)
-PRINT_DIR = --no-print-directory
-else # "make -w"
-NO_SUBDIR = :
-endif
-
ifneq ($(findstring $(MAKEFLAGS),s),s)
ifndef V
QUIET_CC = @echo ' ' CC $@;
QUIET_AR = @echo ' ' AR $@;
QUIET_LINK = @echo ' ' LINK $@;
QUIET_MKDIR = @echo ' ' MKDIR $@;
- QUIET_BUILT_IN = @echo ' ' BUILTIN $@;
QUIET_GEN = @echo ' ' GEN $@;
- QUIET_SUBDIR0 = +@subdir=
- QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \
- $(MAKE) $(PRINT_DIR) -C $$subdir
- export V
- export QUIET_GEN
- export QUIET_BUILT_IN
endif
endif
@@ -894,7 +552,6 @@ endif
# Shell quote (do not use $(call) to accommodate ancient setups);
-SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER))
ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
@@ -908,46 +565,36 @@ htmldir_SQ = $(subst ','\'',$(htmldir))
prefix_SQ = $(subst ','\'',$(prefix))
SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
-PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH))
LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS)
-BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
- $(COMPAT_CFLAGS)
-LIB_OBJS += $(COMPAT_OBJS)
-
ALL_CFLAGS += $(BASIC_CFLAGS)
ALL_CFLAGS += $(ARCH_CFLAGS)
ALL_LDFLAGS += $(BASIC_LDFLAGS)
-export TAR INSTALL DESTDIR SHELL_PATH
+export INSTALL SHELL_PATH
### Build rules
SHELL = $(SHELL_PATH)
-all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS
-ifneq (,$X)
- $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
-endif
-
-all::
+all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
please_set_SHELL_PATH_to_a_more_modern_shell:
@$$(:)
shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
-strip: $(PROGRAMS) $(OUTPUT)perf$X
- $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf$X
+strip: $(PROGRAMS) $(OUTPUT)perf
+ $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
'-DPERF_HTML_PATH="$(htmldir_SQ)"' \
$(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
-$(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
+$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \
$(BUILTIN_OBJS) $(LIBS) -o $@
@@ -963,39 +610,17 @@ $(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPU
'-DPERF_MAN_PATH="$(mandir_SQ)"' \
'-DPERF_INFO_PATH="$(infodir_SQ)"' $<
-$(BUILT_INS): $(OUTPUT)perf$X
- $(QUIET_BUILT_IN)$(RM) $@ && \
- ln perf$X $@ 2>/dev/null || \
- ln -s perf$X $@ 2>/dev/null || \
- cp perf$X $@
-
$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
$(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
-$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
- $(QUIET_GEN)$(RM) $(OUTPUT)$@ $(OUTPUT)$@+ && \
- sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
- -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \
- -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
- -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
- -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
- $@.sh > $(OUTPUT)$@+ && \
- chmod +x $(OUTPUT)$@+ && \
- mv $(OUTPUT)$@+ $(OUTPUT)$@
-
-configure: configure.ac
- $(QUIET_GEN)$(RM) $@ $<+ && \
- sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
- $< > $<+ && \
- autoconf -o $@ $<+ && \
- $(RM) $<+
+$(SCRIPTS) : % : %.sh
+ $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
# These can record PERF_VERSION
$(OUTPUT)perf.o perf.spec \
- $(patsubst %.sh,%,$(SCRIPT_SH)) \
- $(patsubst %.perl,%,$(SCRIPT_PERL)) \
+ $(SCRIPTS) \
: $(OUTPUT)PERF-VERSION-FILE
$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
@@ -1012,9 +637,6 @@ $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
'-DPREFIX="$(prefix_SQ)"' \
$<
-$(OUTPUT)builtin-init-db.o: builtin-init-db.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $<
-
$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
@@ -1024,6 +646,9 @@ $(OUTPUT)util/ui/browser.o: util/ui/browser.c $(OUTPUT)PERF-CFLAGS
$(OUTPUT)util/ui/browsers/annotate.o: util/ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
+$(OUTPUT)util/ui/browsers/top.o: util/ui/browsers/top.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
$(OUTPUT)util/ui/browsers/hists.o: util/ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
@@ -1045,12 +670,11 @@ $(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/tra
$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
-$(OUTPUT)perf-%$X: %.o $(PERFLIBS)
+$(OUTPUT)perf-%: %.o $(PERFLIBS)
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
-$(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
-builtin-revert.o wt-status.o: wt-status.h
+$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
# we depend the various files onto their directories.
@@ -1063,6 +687,36 @@ $(sort $(dir $(DIRECTORY_DEPS))):
$(LIB_FILE): $(LIB_OBJS)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
+help:
+ @echo 'Perf make targets:'
+ @echo ' doc - make *all* documentation (see below)'
+ @echo ' man - make manpage documentation (access with man <foo>)'
+ @echo ' html - make html documentation'
+ @echo ' info - make GNU info documentation (access with info <foo>)'
+ @echo ' pdf - make pdf documentation'
+ @echo ' TAGS - use etags to make tag information for source browsing'
+ @echo ' tags - use ctags to make tag information for source browsing'
+ @echo ' cscope - use cscope to make interactive browsing database'
+ @echo ''
+ @echo 'Perf install targets:'
+ @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed'
+ @echo ' HINT: use "make prefix=<path> <install target>" to install to a particular'
+ @echo ' path like make prefix=/usr/local install install-doc'
+ @echo ' install - install compiled binaries'
+ @echo ' install-doc - install *all* documentation'
+ @echo ' install-man - install manpage documentation'
+ @echo ' install-html - install html documentation'
+ @echo ' install-info - install GNU info documentation'
+ @echo ' install-pdf - install pdf documentation'
+ @echo ''
+ @echo ' quick-install-doc - alias for quick-install-man'
+ @echo ' quick-install-man - install the documentation quickly'
+ @echo ' quick-install-html - install the html documentation quickly'
+ @echo ''
+ @echo 'Perf maintainer targets:'
+ @echo ' distclean - alias to clean'
+ @echo ' clean - clean all binary objects and build output'
+
doc:
$(MAKE) -C Documentation all
@@ -1101,30 +755,12 @@ $(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
fi
-# We need to apply sq twice, once to protect from the shell
-# that runs $(OUTPUT)PERF-BUILD-OPTIONS, and then again to protect it
-# and the first level quoting from the shell that runs "echo".
-$(OUTPUT)PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS
- @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@
- @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
- @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
- @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
-
### Testing rules
-#
-# None right now:
-#
-# TEST_PROGRAMS += test-something$X
-
-all:: $(TEST_PROGRAMS)
-
# GNU make supports exporting all variables by "export" without parameters.
# However, the environment gets quite big, and some programs have problems
# with that.
-export NO_SVN_TESTS
-
check: $(OUTPUT)common-cmds.h
if sparse; \
then \
@@ -1133,33 +769,21 @@ check: $(OUTPUT)common-cmds.h
sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
done; \
else \
- echo 2>&1 "Did you mean 'make test'?"; \
exit 1; \
fi
-remove-dashes:
- ./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS)
-
### Installation rules
-ifneq ($(filter /%,$(firstword $(template_dir))),)
-template_instdir = $(template_dir)
-else
-template_instdir = $(prefix)/$(template_dir)
-endif
-export template_instdir
-
ifneq ($(filter /%,$(firstword $(perfexecdir))),)
perfexec_instdir = $(perfexecdir)
else
perfexec_instdir = $(prefix)/$(perfexecdir)
endif
perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
-export perfexec_instdir
install: all
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
- $(INSTALL) $(OUTPUT)perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
+ $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
@@ -1172,14 +796,6 @@ install: all
$(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
$(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
-ifdef BUILT_INS
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
- $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
-ifneq (,$X)
- $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) $(OUTPUT)perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
-endif
-endif
-
install-doc:
$(MAKE) -C Documentation install
@@ -1204,104 +820,17 @@ quick-install-man:
quick-install-html:
$(MAKE) -C Documentation quick-install-html
-
-### Maintainer's dist rules
-#
-# None right now
-#
-#
-# perf.spec: perf.spec.in
-# sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+
-# mv $@+ $@
-#
-# PERF_TARNAME=perf-$(PERF_VERSION)
-# dist: perf.spec perf-archive$(X) configure
-# ./perf-archive --format=tar \
-# --prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar
-# @mkdir -p $(PERF_TARNAME)
-# @cp perf.spec configure $(PERF_TARNAME)
-# @echo $(PERF_VERSION) > $(PERF_TARNAME)/version
-# $(TAR) rf $(PERF_TARNAME).tar \
-# $(PERF_TARNAME)/perf.spec \
-# $(PERF_TARNAME)/configure \
-# $(PERF_TARNAME)/version
-# @$(RM) -r $(PERF_TARNAME)
-# gzip -f -9 $(PERF_TARNAME).tar
-#
-# htmldocs = perf-htmldocs-$(PERF_VERSION)
-# manpages = perf-manpages-$(PERF_VERSION)
-# dist-doc:
-# $(RM) -r .doc-tmp-dir
-# mkdir .doc-tmp-dir
-# $(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc
-# cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar .
-# gzip -n -9 -f $(htmldocs).tar
-# :
-# $(RM) -r .doc-tmp-dir
-# mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7
-# $(MAKE) -C Documentation DESTDIR=./ \
-# man1dir=../.doc-tmp-dir/man1 \
-# man5dir=../.doc-tmp-dir/man5 \
-# man7dir=../.doc-tmp-dir/man7 \
-# install
-# cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar .
-# gzip -n -9 -f $(manpages).tar
-# $(RM) -r .doc-tmp-dir
-#
-# rpm: dist
-# $(RPMBUILD) -ta $(PERF_TARNAME).tar.gz
-
### Cleaning rules
-distclean: clean
-# $(RM) configure
-
clean:
- $(RM) *.o */*.o */*/*.o */*/*/*.o $(LIB_FILE)
- $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X
- $(RM) $(TEST_PROGRAMS)
+ $(RM) $(OUTPUT){*.o,*/*.o,*/*/*.o,*/*/*/*.o,$(LIB_FILE),perf-archive}
+ $(RM) $(ALL_PROGRAMS) perf
$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
- $(RM) -r autom4te.cache
- $(RM) config.log config.mak.autogen config.mak.append config.status config.cache
- $(RM) -r $(PERF_TARNAME) .doc-tmp-dir
- $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz
- $(RM) $(htmldocs).tar.gz $(manpages).tar.gz
$(MAKE) -C Documentation/ clean
- $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-BUILD-OPTIONS
+ $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS
+ @python util/setup.py clean --build-lib='$(OUTPUT)python' \
+ --build-temp='$(OUTPUT)python/temp'
.PHONY: all install clean strip
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
-.PHONY: .FORCE-PERF-BUILD-OPTIONS
-
-### Make sure built-ins do not have dups and listed in perf.c
-#
-check-builtins::
- ./check-builtins.sh
-
-### Test suite coverage testing
-#
-# None right now
-#
-# .PHONY: coverage coverage-clean coverage-build coverage-report
-#
-# coverage:
-# $(MAKE) coverage-build
-# $(MAKE) coverage-report
-#
-# coverage-clean:
-# rm -f *.gcda *.gcno
-#
-# COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs
-# COVERAGE_LDFLAGS = $(CFLAGS) -O0 -lgcov
-#
-# coverage-build: coverage-clean
-# $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all
-# $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \
-# -j1 test
-#
-# coverage-report:
-# gcov -b *.c */*.c
-# grep '^function.*called 0 ' *.c.gcov */*.c.gcov \
-# | sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \
-# | tee coverage-untested-functions
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index d9ab3ce446ac..0c7454f8b8a9 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -55,7 +55,7 @@ int bench_sched_pipe(int argc, const char **argv,
* discarding returned value of read(), write()
* causes error in building environment for perf
*/
- int ret, wait_stat;
+ int __used ret, wait_stat;
pid_t pid, retpid;
argc = parse_options(argc, argv, options,
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 8879463807e4..695de4b5ae63 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -9,6 +9,7 @@
#include "util/util.h"
+#include "util/util.h"
#include "util/color.h"
#include <linux/list.h>
#include "util/cache.h"
@@ -18,6 +19,9 @@
#include "perf.h"
#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/annotate.h"
#include "util/event.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
@@ -36,9 +40,13 @@ static bool print_line;
static const char *sym_hist_filter;
-static int hists__add_entry(struct hists *self, struct addr_location *al)
+static int perf_evlist__add_sample(struct perf_evlist *evlist,
+ struct perf_sample *sample,
+ struct addr_location *al)
{
+ struct perf_evsel *evsel;
struct hist_entry *he;
+ int ret;
if (sym_hist_filter != NULL &&
(al->sym == NULL || strcmp(sym_hist_filter, al->sym->name) != 0)) {
@@ -51,25 +59,51 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
return 0;
}
- he = __hists__add_entry(self, al, NULL, 1);
+ evsel = perf_evlist__id2evsel(evlist, sample->id);
+ if (evsel == NULL) {
+ /*
+ * FIXME: Propagate this back, but at least we're in a builtin,
+ * where exit() is allowed. ;-)
+ */
+ ui__warning("Invalid %s file, contains samples with id not in "
+ "its header!\n", input_name);
+ exit_browser(0);
+ exit(1);
+ }
+
+ he = __hists__add_entry(&evsel->hists, al, NULL, 1);
if (he == NULL)
return -ENOMEM;
- return hist_entry__inc_addr_samples(he, al->addr);
+ ret = 0;
+ if (he->ms.sym != NULL) {
+ struct annotation *notes = symbol__annotation(he->ms.sym);
+ if (notes->src == NULL &&
+ symbol__alloc_hist(he->ms.sym, evlist->nr_entries) < 0)
+ return -ENOMEM;
+
+ ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+ }
+
+ evsel->hists.stats.total_period += sample->period;
+ hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+ return ret;
}
-static int process_sample_event(event_t *event, struct sample_data *sample,
+static int process_sample_event(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct addr_location al;
- if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
+ if (perf_event__preprocess_sample(event, session, &al, sample,
+ symbol__annotate_init) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
}
- if (!al.filtered && hists__add_entry(&session->hists, &al)) {
+ if (!al.filtered && perf_evlist__add_sample(session->evlist, sample, &al)) {
pr_warning("problem incrementing symbol count, "
"skipping event\n");
return -1;
@@ -78,261 +112,26 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
return 0;
}
-static int objdump_line__print(struct objdump_line *self,
- struct list_head *head,
- struct hist_entry *he, u64 len)
-{
- struct symbol *sym = he->ms.sym;
- static const char *prev_line;
- static const char *prev_color;
-
- if (self->offset != -1) {
- const char *path = NULL;
- unsigned int hits = 0;
- double percent = 0.0;
- const char *color;
- struct sym_priv *priv = symbol__priv(sym);
- struct sym_ext *sym_ext = priv->ext;
- struct sym_hist *h = priv->hist;
- s64 offset = self->offset;
- struct objdump_line *next = objdump__get_next_ip_line(head, self);
-
- while (offset < (s64)len &&
- (next == NULL || offset < next->offset)) {
- if (sym_ext) {
- if (path == NULL)
- path = sym_ext[offset].path;
- percent += sym_ext[offset].percent;
- } else
- hits += h->ip[offset];
-
- ++offset;
- }
-
- if (sym_ext == NULL && h->sum)
- percent = 100.0 * hits / h->sum;
-
- color = get_percent_color(percent);
-
- /*
- * Also color the filename and line if needed, with
- * the same color than the percentage. Don't print it
- * twice for close colored ip with the same filename:line
- */
- if (path) {
- if (!prev_line || strcmp(prev_line, path)
- || color != prev_color) {
- color_fprintf(stdout, color, " %s", path);
- prev_line = path;
- prev_color = color;
- }
- }
-
- color_fprintf(stdout, color, " %7.2f", percent);
- printf(" : ");
- color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", self->line);
- } else {
- if (!*self->line)
- printf(" :\n");
- else
- printf(" : %s\n", self->line);
- }
-
- return 0;
-}
-
-static struct rb_root root_sym_ext;
-
-static void insert_source_line(struct sym_ext *sym_ext)
-{
- struct sym_ext *iter;
- struct rb_node **p = &root_sym_ext.rb_node;
- struct rb_node *parent = NULL;
-
- while (*p != NULL) {
- parent = *p;
- iter = rb_entry(parent, struct sym_ext, node);
-
- if (sym_ext->percent > iter->percent)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
-
- rb_link_node(&sym_ext->node, parent, p);
- rb_insert_color(&sym_ext->node, &root_sym_ext);
-}
-
-static void free_source_line(struct hist_entry *he, int len)
-{
- struct sym_priv *priv = symbol__priv(he->ms.sym);
- struct sym_ext *sym_ext = priv->ext;
- int i;
-
- if (!sym_ext)
- return;
-
- for (i = 0; i < len; i++)
- free(sym_ext[i].path);
- free(sym_ext);
-
- priv->ext = NULL;
- root_sym_ext = RB_ROOT;
-}
-
-/* Get the filename:line for the colored entries */
-static void
-get_source_line(struct hist_entry *he, int len, const char *filename)
-{
- struct symbol *sym = he->ms.sym;
- u64 start;
- int i;
- char cmd[PATH_MAX * 2];
- struct sym_ext *sym_ext;
- struct sym_priv *priv = symbol__priv(sym);
- struct sym_hist *h = priv->hist;
-
- if (!h->sum)
- return;
-
- sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext));
- if (!priv->ext)
- return;
-
- start = he->ms.map->unmap_ip(he->ms.map, sym->start);
-
- for (i = 0; i < len; i++) {
- char *path = NULL;
- size_t line_len;
- u64 offset;
- FILE *fp;
-
- sym_ext[i].percent = 100.0 * h->ip[i] / h->sum;
- if (sym_ext[i].percent <= 0.5)
- continue;
-
- offset = start + i;
- sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset);
- fp = popen(cmd, "r");
- if (!fp)
- continue;
-
- if (getline(&path, &line_len, fp) < 0 || !line_len)
- goto next;
-
- sym_ext[i].path = malloc(sizeof(char) * line_len + 1);
- if (!sym_ext[i].path)
- goto next;
-
- strcpy(sym_ext[i].path, path);
- insert_source_line(&sym_ext[i]);
-
- next:
- pclose(fp);
- }
-}
-
-static void print_summary(const char *filename)
-{
- struct sym_ext *sym_ext;
- struct rb_node *node;
-
- printf("\nSorted summary for file %s\n", filename);
- printf("----------------------------------------------\n\n");
-
- if (RB_EMPTY_ROOT(&root_sym_ext)) {
- printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
- return;
- }
-
- node = rb_first(&root_sym_ext);
- while (node) {
- double percent;
- const char *color;
- char *path;
-
- sym_ext = rb_entry(node, struct sym_ext, node);
- percent = sym_ext->percent;
- color = get_percent_color(percent);
- path = sym_ext->path;
-
- color_fprintf(stdout, color, " %7.2f %s", percent, path);
- node = rb_next(node);
- }
-}
-
-static void hist_entry__print_hits(struct hist_entry *self)
-{
- struct symbol *sym = self->ms.sym;
- struct sym_priv *priv = symbol__priv(sym);
- struct sym_hist *h = priv->hist;
- u64 len = sym->end - sym->start, offset;
-
- for (offset = 0; offset < len; ++offset)
- if (h->ip[offset] != 0)
- printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
- sym->start + offset, h->ip[offset]);
- printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum);
-}
-
-static int hist_entry__tty_annotate(struct hist_entry *he)
+static int hist_entry__tty_annotate(struct hist_entry *he, int evidx)
{
- struct map *map = he->ms.map;
- struct dso *dso = map->dso;
- struct symbol *sym = he->ms.sym;
- const char *filename = dso->long_name, *d_filename;
- u64 len;
- LIST_HEAD(head);
- struct objdump_line *pos, *n;
-
- if (hist_entry__annotate(he, &head, 0) < 0)
- return -1;
-
- if (full_paths)
- d_filename = filename;
- else
- d_filename = basename(filename);
-
- len = sym->end - sym->start;
-
- if (print_line) {
- get_source_line(he, len, filename);
- print_summary(filename);
- }
-
- printf("\n\n------------------------------------------------\n");
- printf(" Percent | Source code & Disassembly of %s\n", d_filename);
- printf("------------------------------------------------\n");
-
- if (verbose)
- hist_entry__print_hits(he);
-
- list_for_each_entry_safe(pos, n, &head, node) {
- objdump_line__print(pos, &head, he, len);
- list_del(&pos->node);
- objdump_line__free(pos);
- }
-
- if (print_line)
- free_source_line(he, len);
-
- return 0;
+ return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx,
+ print_line, full_paths, 0, 0);
}
-static void hists__find_annotations(struct hists *self)
+static void hists__find_annotations(struct hists *self, int evidx)
{
struct rb_node *nd = rb_first(&self->entries), *next;
int key = KEY_RIGHT;
while (nd) {
struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
- struct sym_priv *priv;
+ struct annotation *notes;
if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
goto find_next;
- priv = symbol__priv(he->ms.sym);
- if (priv->hist == NULL) {
+ notes = symbol__annotation(he->ms.sym);
+ if (notes->src == NULL) {
find_next:
if (key == KEY_LEFT)
nd = rb_prev(nd);
@@ -342,7 +141,7 @@ find_next:
}
if (use_browser > 0) {
- key = hist_entry__tui_annotate(he);
+ key = hist_entry__tui_annotate(he, evidx);
switch (key) {
case KEY_RIGHT:
next = rb_next(nd);
@@ -357,24 +156,24 @@ find_next:
if (next != NULL)
nd = next;
} else {
- hist_entry__tty_annotate(he);
+ hist_entry__tty_annotate(he, evidx);
nd = rb_next(nd);
/*
* Since we have a hist_entry per IP for the same
- * symbol, free he->ms.sym->hist to signal we already
+ * symbol, free he->ms.sym->src to signal we already
* processed this symbol.
*/
- free(priv->hist);
- priv->hist = NULL;
+ free(notes->src);
+ notes->src = NULL;
}
}
}
static struct perf_event_ops event_ops = {
.sample = process_sample_event,
- .mmap = event__process_mmap,
- .comm = event__process_comm,
- .fork = event__process_task,
+ .mmap = perf_event__process_mmap,
+ .comm = perf_event__process_comm,
+ .fork = perf_event__process_task,
.ordered_samples = true,
.ordering_requires_timestamps = true,
};
@@ -383,6 +182,8 @@ static int __cmd_annotate(void)
{
int ret;
struct perf_session *session;
+ struct perf_evsel *pos;
+ u64 total_nr_samples;
session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops);
if (session == NULL)
@@ -403,12 +204,36 @@ static int __cmd_annotate(void)
if (verbose > 2)
perf_session__fprintf_dsos(session, stdout);
- hists__collapse_resort(&session->hists);
- hists__output_resort(&session->hists);
- hists__find_annotations(&session->hists);
-out_delete:
- perf_session__delete(session);
+ total_nr_samples = 0;
+ list_for_each_entry(pos, &session->evlist->entries, node) {
+ struct hists *hists = &pos->hists;
+ u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+
+ if (nr_samples > 0) {
+ total_nr_samples += nr_samples;
+ hists__collapse_resort(hists);
+ hists__output_resort(hists);
+ hists__find_annotations(hists, pos->idx);
+ }
+ }
+ if (total_nr_samples == 0) {
+ ui__warning("The %s file has no samples!\n", input_name);
+ goto out_delete;
+ }
+out_delete:
+ /*
+ * Speed up the exit process, for large files this can
+ * take quite a while.
+ *
+ * XXX Enable this when using valgrind or if we ever
+ * librarize this command.
+ *
+ * Also experiment with obstacks to see how much speed
+ * up we'll get here.
+ *
+ * perf_session__delete(session);
+ */
return ret;
}
@@ -451,9 +276,9 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
else if (use_tui)
use_browser = 1;
- setup_browser();
+ setup_browser(true);
- symbol_conf.priv_size = sizeof(struct sym_priv);
+ symbol_conf.priv_size = sizeof(struct annotation);
symbol_conf.try_vmlinux_path = true;
if (symbol__init() < 0)
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 3153e492dbcc..6b7d91160ecb 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -30,13 +30,13 @@ static int hists__add_entry(struct hists *self,
return -ENOMEM;
}
-static int diff__process_sample_event(event_t *event,
- struct sample_data *sample,
+static int diff__process_sample_event(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct addr_location al;
- if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
+ if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
@@ -56,11 +56,11 @@ static int diff__process_sample_event(event_t *event,
static struct perf_event_ops event_ops = {
.sample = diff__process_sample_event,
- .mmap = event__process_mmap,
- .comm = event__process_comm,
- .exit = event__process_task,
- .fork = event__process_task,
- .lost = event__process_lost,
+ .mmap = perf_event__process_mmap,
+ .comm = perf_event__process_comm,
+ .exit = perf_event__process_task,
+ .fork = perf_event__process_task,
+ .lost = perf_event__process_lost,
.ordered_samples = true,
.ordering_requires_timestamps = true,
};
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 0c78ffa7bf67..e29f04ed3396 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -16,8 +16,8 @@
static char const *input_name = "-";
static bool inject_build_ids;
-static int event__repipe_synth(event_t *event,
- struct perf_session *session __used)
+static int perf_event__repipe_synth(union perf_event *event,
+ struct perf_session *session __used)
{
uint32_t size;
void *buf = event;
@@ -36,41 +36,44 @@ static int event__repipe_synth(event_t *event,
return 0;
}
-static int event__repipe(event_t *event, struct sample_data *sample __used,
- struct perf_session *session)
+static int perf_event__repipe(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
- return event__repipe_synth(event, session);
+ return perf_event__repipe_synth(event, session);
}
-static int event__repipe_mmap(event_t *self, struct sample_data *sample,
- struct perf_session *session)
+static int perf_event__repipe_mmap(union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_session *session)
{
int err;
- err = event__process_mmap(self, sample, session);
- event__repipe(self, sample, session);
+ err = perf_event__process_mmap(event, sample, session);
+ perf_event__repipe(event, sample, session);
return err;
}
-static int event__repipe_task(event_t *self, struct sample_data *sample,
- struct perf_session *session)
+static int perf_event__repipe_task(union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_session *session)
{
int err;
- err = event__process_task(self, sample, session);
- event__repipe(self, sample, session);
+ err = perf_event__process_task(event, sample, session);
+ perf_event__repipe(event, sample, session);
return err;
}
-static int event__repipe_tracing_data(event_t *self,
- struct perf_session *session)
+static int perf_event__repipe_tracing_data(union perf_event *event,
+ struct perf_session *session)
{
int err;
- event__repipe_synth(self, session);
- err = event__process_tracing_data(self, session);
+ perf_event__repipe_synth(event, session);
+ err = perf_event__process_tracing_data(event, session);
return err;
}
@@ -109,8 +112,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
if (self->kernel)
misc = PERF_RECORD_MISC_KERNEL;
- err = event__synthesize_build_id(self, misc, event__repipe,
- machine, session);
+ err = perf_event__synthesize_build_id(self, misc, perf_event__repipe,
+ machine, session);
if (err) {
pr_err("Can't synthesize build_id event for %s\n", self->long_name);
return -1;
@@ -119,8 +122,9 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
return 0;
}
-static int event__inject_buildid(event_t *event, struct sample_data *sample,
- struct perf_session *session)
+static int perf_event__inject_buildid(union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_session *session)
{
struct addr_location al;
struct thread *thread;
@@ -155,24 +159,24 @@ static int event__inject_buildid(event_t *event, struct sample_data *sample,
}
repipe:
- event__repipe(event, sample, session);
+ perf_event__repipe(event, sample, session);
return 0;
}
struct perf_event_ops inject_ops = {
- .sample = event__repipe,
- .mmap = event__repipe,
- .comm = event__repipe,
- .fork = event__repipe,
- .exit = event__repipe,
- .lost = event__repipe,
- .read = event__repipe,
- .throttle = event__repipe,
- .unthrottle = event__repipe,
- .attr = event__repipe_synth,
- .event_type = event__repipe_synth,
- .tracing_data = event__repipe_synth,
- .build_id = event__repipe_synth,
+ .sample = perf_event__repipe,
+ .mmap = perf_event__repipe,
+ .comm = perf_event__repipe,
+ .fork = perf_event__repipe,
+ .exit = perf_event__repipe,
+ .lost = perf_event__repipe,
+ .read = perf_event__repipe,
+ .throttle = perf_event__repipe,
+ .unthrottle = perf_event__repipe,
+ .attr = perf_event__repipe_synth,
+ .event_type = perf_event__repipe_synth,
+ .tracing_data = perf_event__repipe_synth,
+ .build_id = perf_event__repipe_synth,
};
extern volatile int session_done;
@@ -190,10 +194,10 @@ static int __cmd_inject(void)
signal(SIGINT, sig_handler);
if (inject_build_ids) {
- inject_ops.sample = event__inject_buildid;
- inject_ops.mmap = event__repipe_mmap;
- inject_ops.fork = event__repipe_task;
- inject_ops.tracing_data = event__repipe_tracing_data;
+ inject_ops.sample = perf_event__inject_buildid;
+ inject_ops.mmap = perf_event__repipe_mmap;
+ inject_ops.fork = perf_event__repipe_task;
+ inject_ops.tracing_data = perf_event__repipe_tracing_data;
}
session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index d97256d65980..7f618f4e7b79 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -275,9 +275,8 @@ static void process_free_event(void *data,
s_alloc->alloc_cpu = -1;
}
-static void
-process_raw_event(event_t *raw_event __used, void *data,
- int cpu, u64 timestamp, struct thread *thread)
+static void process_raw_event(union perf_event *raw_event __used, void *data,
+ int cpu, u64 timestamp, struct thread *thread)
{
struct event *event;
int type;
@@ -304,7 +303,8 @@ process_raw_event(event_t *raw_event __used, void *data,
}
}
-static int process_sample_event(event_t *event, struct sample_data *sample,
+static int process_sample_event(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct thread *thread = perf_session__findnew(session, event->ip.pid);
@@ -325,7 +325,7 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
static struct perf_event_ops event_ops = {
.sample = process_sample_event,
- .comm = event__process_comm,
+ .comm = perf_event__process_comm,
.ordered_samples = true,
};
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index d88c6961274c..6313b6eb3ebb 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -5,6 +5,7 @@
*
* Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
*/
#include "builtin.h"
@@ -13,9 +14,47 @@
#include "util/parse-events.h"
#include "util/cache.h"
-int cmd_list(int argc __used, const char **argv __used, const char *prefix __used)
+int cmd_list(int argc, const char **argv, const char *prefix __used)
{
setup_pager();
- print_events();
+
+ if (argc == 1)
+ print_events(NULL);
+ else {
+ int i;
+
+ for (i = 1; i < argc; ++i) {
+ if (i > 1)
+ putchar('\n');
+ if (strncmp(argv[i], "tracepoint", 10) == 0)
+ print_tracepoint_events(NULL, NULL);
+ else if (strcmp(argv[i], "hw") == 0 ||
+ strcmp(argv[i], "hardware") == 0)
+ print_events_type(PERF_TYPE_HARDWARE);
+ else if (strcmp(argv[i], "sw") == 0 ||
+ strcmp(argv[i], "software") == 0)
+ print_events_type(PERF_TYPE_SOFTWARE);
+ else if (strcmp(argv[i], "cache") == 0 ||
+ strcmp(argv[i], "hwcache") == 0)
+ print_hwcache_events(NULL);
+ else {
+ char *sep = strchr(argv[i], ':'), *s;
+ int sep_idx;
+
+ if (sep == NULL) {
+ print_events(argv[i]);
+ continue;
+ }
+ sep_idx = sep - argv[i];
+ s = strdup(argv[i]);
+ if (s == NULL)
+ return -1;
+
+ s[sep_idx] = '\0';
+ print_tracepoint_events(s, s + sep_idx + 1);
+ free(s);
+ }
+ }
+ }
return 0;
}
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 2b36defc5d73..2e93f99b1480 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -834,14 +834,14 @@ static void dump_info(void)
die("Unknown type of information\n");
}
-static int process_sample_event(event_t *self, struct sample_data *sample,
+static int process_sample_event(union perf_event *event, struct perf_sample *sample,
struct perf_session *s)
{
struct thread *thread = perf_session__findnew(s, sample->tid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
- self->header.type);
+ event->header.type);
return -1;
}
@@ -852,7 +852,7 @@ static int process_sample_event(event_t *self, struct sample_data *sample,
static struct perf_event_ops eops = {
.sample = process_sample_event,
- .comm = event__process_comm,
+ .comm = perf_event__process_comm,
.ordered_samples = true,
};
@@ -893,7 +893,7 @@ static const char * const report_usage[] = {
static const struct option report_options[] = {
OPT_STRING('k', "key", &sort_key, "acquired",
- "key for sorting"),
+ "key for sorting (acquired / contended / wait_total / wait_max / wait_min)"),
/* TODO: type */
OPT_END()
};
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index add163c9f0e7..2c0e64d0b4aa 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -36,6 +36,7 @@
#include "builtin.h"
#include "util/util.h"
#include "util/strlist.h"
+#include "util/strfilter.h"
#include "util/symbol.h"
#include "util/debug.h"
#include "util/debugfs.h"
@@ -43,6 +44,8 @@
#include "util/probe-finder.h"
#include "util/probe-event.h"
+#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
+#define DEFAULT_FUNC_FILTER "!_*"
#define MAX_PATH_LEN 256
/* Session management structure */
@@ -52,6 +55,7 @@ static struct {
bool show_lines;
bool show_vars;
bool show_ext_vars;
+ bool show_funcs;
bool mod_events;
int nevents;
struct perf_probe_event events[MAX_PROBES];
@@ -59,6 +63,7 @@ static struct {
struct line_range line_range;
const char *target_module;
int max_probe_points;
+ struct strfilter *filter;
} params;
/* Parse an event definition. Note that any error must die. */
@@ -157,6 +162,27 @@ static int opt_show_vars(const struct option *opt __used,
}
#endif
+static int opt_set_filter(const struct option *opt __used,
+ const char *str, int unset __used)
+{
+ const char *err;
+
+ if (str) {
+ pr_debug2("Set filter: %s\n", str);
+ if (params.filter)
+ strfilter__delete(params.filter);
+ params.filter = strfilter__new(str, &err);
+ if (!params.filter) {
+ pr_err("Filter parse error at %td.\n", err - str + 1);
+ pr_err("Source: \"%s\"\n", str);
+ pr_err(" %*c\n", (int)(err - str + 1), '^');
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static const char * const probe_usage[] = {
"perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
@@ -221,6 +247,13 @@ static const struct option options[] = {
OPT__DRY_RUN(&probe_event_dry_run),
OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
"Set how many probe points can be found for a probe."),
+ OPT_BOOLEAN('F', "funcs", &params.show_funcs,
+ "Show potential probe-able functions."),
+ OPT_CALLBACK('\0', "filter", NULL,
+ "[!]FILTER", "Set a filter (with --vars/funcs only)\n"
+ "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n"
+ "\t\t\t \"" DEFAULT_FUNC_FILTER "\" for --funcs)",
+ opt_set_filter),
OPT_END()
};
@@ -246,7 +279,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
params.max_probe_points = MAX_PROBES;
if ((!params.nevents && !params.dellist && !params.list_events &&
- !params.show_lines))
+ !params.show_lines && !params.show_funcs))
usage_with_options(probe_usage, options);
/*
@@ -267,12 +300,41 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
pr_err(" Error: Don't use --list with --vars.\n");
usage_with_options(probe_usage, options);
}
+ if (params.show_funcs) {
+ pr_err(" Error: Don't use --list with --funcs.\n");
+ usage_with_options(probe_usage, options);
+ }
ret = show_perf_probe_events();
if (ret < 0)
pr_err(" Error: Failed to show event list. (%d)\n",
ret);
return ret;
}
+ if (params.show_funcs) {
+ if (params.nevents != 0 || params.dellist) {
+ pr_err(" Error: Don't use --funcs with"
+ " --add/--del.\n");
+ usage_with_options(probe_usage, options);
+ }
+ if (params.show_lines) {
+ pr_err(" Error: Don't use --funcs with --line.\n");
+ usage_with_options(probe_usage, options);
+ }
+ if (params.show_vars) {
+ pr_err(" Error: Don't use --funcs with --vars.\n");
+ usage_with_options(probe_usage, options);
+ }
+ if (!params.filter)
+ params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
+ NULL);
+ ret = show_available_funcs(params.target_module,
+ params.filter);
+ strfilter__delete(params.filter);
+ if (ret < 0)
+ pr_err(" Error: Failed to show functions."
+ " (%d)\n", ret);
+ return ret;
+ }
#ifdef DWARF_SUPPORT
if (params.show_lines) {
@@ -297,10 +359,16 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
" --add/--del.\n");
usage_with_options(probe_usage, options);
}
+ if (!params.filter)
+ params.filter = strfilter__new(DEFAULT_VAR_FILTER,
+ NULL);
+
ret = show_available_vars(params.events, params.nevents,
params.max_probe_points,
params.target_module,
+ params.filter,
params.show_ext_vars);
+ strfilter__delete(params.filter);
if (ret < 0)
pr_err(" Error: Failed to show vars. (%d)\n", ret);
return ret;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 60cac6f92e8b..6febcc168a8c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -18,11 +18,13 @@
#include "util/header.h"
#include "util/event.h"
+#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
#include "util/session.h"
#include "util/symbol.h"
#include "util/cpumap.h"
+#include "util/thread_map.h"
#include <unistd.h>
#include <sched.h>
@@ -37,16 +39,14 @@ enum write_mode_t {
static u64 user_interval = ULLONG_MAX;
static u64 default_interval = 0;
-static u64 sample_type;
-static struct cpu_map *cpus;
static unsigned int page_size;
static unsigned int mmap_pages = 128;
static unsigned int user_freq = UINT_MAX;
static int freq = 1000;
static int output;
static int pipe_output = 0;
-static const char *output_name = "perf.data";
+static const char *output_name = NULL;
static int group = 0;
static int realtime_prio = 0;
static bool nodelay = false;
@@ -55,7 +55,6 @@ static bool sample_id_all_avail = true;
static bool system_wide = false;
static pid_t target_pid = -1;
static pid_t target_tid = -1;
-static struct thread_map *threads;
static pid_t child_pid = -1;
static bool no_inherit = false;
static enum write_mode_t write_mode = WRITE_FORCE;
@@ -66,51 +65,17 @@ static bool sample_address = false;
static bool sample_time = false;
static bool no_buildid = false;
static bool no_buildid_cache = false;
+static struct perf_evlist *evsel_list;
static long samples = 0;
static u64 bytes_written = 0;
-static struct pollfd *event_array;
-
-static int nr_poll = 0;
-static int nr_cpu = 0;
-
static int file_new = 1;
static off_t post_processing_offset;
static struct perf_session *session;
static const char *cpu_list;
-struct mmap_data {
- void *base;
- unsigned int mask;
- unsigned int prev;
-};
-
-static struct mmap_data mmap_array[MAX_NR_CPUS];
-
-static unsigned long mmap_read_head(struct mmap_data *md)
-{
- struct perf_event_mmap_page *pc = md->base;
- long head;
-
- head = pc->data_head;
- rmb();
-
- return head;
-}
-
-static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
-{
- struct perf_event_mmap_page *pc = md->base;
-
- /*
- * ensure all reads are done before we write the tail out.
- */
- /* mb(); */
- pc->data_tail = tail;
-}
-
static void advance_output(size_t size)
{
bytes_written += size;
@@ -131,42 +96,26 @@ static void write_output(void *buf, size_t size)
}
}
-static int process_synthesized_event(event_t *event,
- struct sample_data *sample __used,
+static int process_synthesized_event(union perf_event *event,
+ struct perf_sample *sample __used,
struct perf_session *self __used)
{
write_output(event, event->header.size);
return 0;
}
-static void mmap_read(struct mmap_data *md)
+static void mmap_read(struct perf_mmap *md)
{
- unsigned int head = mmap_read_head(md);
+ unsigned int head = perf_mmap__read_head(md);
unsigned int old = md->prev;
unsigned char *data = md->base + page_size;
unsigned long size;
void *buf;
- int diff;
- /*
- * If we're further behind than half the buffer, there's a chance
- * the writer will bite our tail and mess up the samples under us.
- *
- * If we somehow ended up ahead of the head, we got messed up.
- *
- * In either case, truncate and restart at head.
- */
- diff = head - old;
- if (diff < 0) {
- fprintf(stderr, "WARNING: failed to keep up with mmap data\n");
- /*
- * head points to a known good entry, start there.
- */
- old = head;
- }
+ if (old == head)
+ return;
- if (old != head)
- samples++;
+ samples++;
size = head - old;
@@ -185,7 +134,7 @@ static void mmap_read(struct mmap_data *md)
write_output(buf, size);
md->prev = old;
- mmap_write_tail(md, old);
+ perf_mmap__write_tail(md, old);
}
static volatile int done = 0;
@@ -209,53 +158,10 @@ static void sig_atexit(void)
kill(getpid(), signr);
}
-static int group_fd;
-
-static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
-{
- struct perf_header_attr *h_attr;
-
- if (nr < session->header.attrs) {
- h_attr = session->header.attr[nr];
- } else {
- h_attr = perf_header_attr__new(a);
- if (h_attr != NULL)
- if (perf_header__add_attr(&session->header, h_attr) < 0) {
- perf_header_attr__delete(h_attr);
- h_attr = NULL;
- }
- }
-
- return h_attr;
-}
-
-static void create_counter(struct perf_evsel *evsel, int cpu)
+static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
{
- char *filter = evsel->filter;
struct perf_event_attr *attr = &evsel->attr;
- struct perf_header_attr *h_attr;
int track = !evsel->idx; /* only the first counter needs these */
- int thread_index;
- int ret;
- struct {
- u64 count;
- u64 time_enabled;
- u64 time_running;
- u64 id;
- } read_data;
- /*
- * Check if parse_single_tracepoint_event has already asked for
- * PERF_SAMPLE_TIME.
- *
- * XXX this is kludgy but short term fix for problems introduced by
- * eac23d1c that broke 'perf script' by having different sample_types
- * when using multiple tracepoint events when we use a perf binary
- * that tries to use sample_id_all on an older kernel.
- *
- * We need to move counter creation to perf_session, support
- * different sample_types, etc.
- */
- bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -263,7 +169,7 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
- if (nr_counters > 1)
+ if (evlist->nr_entries > 1)
attr->sample_type |= PERF_SAMPLE_ID;
/*
@@ -315,19 +221,58 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
attr->mmap = track;
attr->comm = track;
- attr->inherit = !no_inherit;
+
if (target_pid == -1 && target_tid == -1 && !system_wide) {
attr->disabled = 1;
attr->enable_on_exec = 1;
}
-retry_sample_id:
- attr->sample_id_all = sample_id_all_avail ? 1 : 0;
+}
- for (thread_index = 0; thread_index < threads->nr; thread_index++) {
-try_again:
- FD(evsel, nr_cpu, thread_index) = sys_perf_event_open(attr, threads->map[thread_index], cpu, group_fd, 0);
+static bool perf_evlist__equal(struct perf_evlist *evlist,
+ struct perf_evlist *other)
+{
+ struct perf_evsel *pos, *pair;
+
+ if (evlist->nr_entries != other->nr_entries)
+ return false;
+
+ pair = list_entry(other->entries.next, struct perf_evsel, node);
- if (FD(evsel, nr_cpu, thread_index) < 0) {
+ list_for_each_entry(pos, &evlist->entries, node) {
+ if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
+ return false;
+ pair = list_entry(pair->node.next, struct perf_evsel, node);
+ }
+
+ return true;
+}
+
+static void open_counters(struct perf_evlist *evlist)
+{
+ struct perf_evsel *pos;
+
+ list_for_each_entry(pos, &evlist->entries, node) {
+ struct perf_event_attr *attr = &pos->attr;
+ /*
+ * Check if parse_single_tracepoint_event has already asked for
+ * PERF_SAMPLE_TIME.
+ *
+ * XXX this is kludgy but short term fix for problems introduced by
+ * eac23d1c that broke 'perf script' by having different sample_types
+ * when using multiple tracepoint events when we use a perf binary
+ * that tries to use sample_id_all on an older kernel.
+ *
+ * We need to move counter creation to perf_session, support
+ * different sample_types, etc.
+ */
+ bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
+
+ config_attr(pos, evlist);
+retry_sample_id:
+ attr->sample_id_all = sample_id_all_avail ? 1 : 0;
+try_again:
+ if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group,
+ !no_inherit) < 0) {
int err = errno;
if (err == EPERM || err == EACCES)
@@ -364,7 +309,7 @@ try_again:
}
printf("\n");
error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
- FD(evsel, nr_cpu, thread_index), strerror(err));
+ err, strerror(err));
#if defined(__i386__) || defined(__x86_64__)
if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
@@ -375,90 +320,28 @@ try_again:
#endif
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
- exit(-1);
}
+ }
- h_attr = get_header_attr(attr, evsel->idx);
- if (h_attr == NULL)
- die("nomem\n");
+ if (perf_evlist__set_filters(evlist)) {
+ error("failed to set filter with %d (%s)\n", errno,
+ strerror(errno));
+ exit(-1);
+ }
- if (!file_new) {
- if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
- fprintf(stderr, "incompatible append\n");
- exit(-1);
- }
- }
+ if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
+ die("failed to mmap with %d (%s)\n", errno, strerror(errno));
- if (read(FD(evsel, nr_cpu, thread_index), &read_data, sizeof(read_data)) == -1) {
- perror("Unable to read perf file descriptor");
+ if (file_new)
+ session->evlist = evlist;
+ else {
+ if (!perf_evlist__equal(session->evlist, evlist)) {
+ fprintf(stderr, "incompatible append\n");
exit(-1);
}
+ }
- if (perf_header_attr__add_id(h_attr, read_data.id) < 0) {
- pr_warning("Not enough memory to add id\n");
- exit(-1);
- }
-
- assert(FD(evsel, nr_cpu, thread_index) >= 0);
- fcntl(FD(evsel, nr_cpu, thread_index), F_SETFL, O_NONBLOCK);
-
- /*
- * First counter acts as the group leader:
- */
- if (group && group_fd == -1)
- group_fd = FD(evsel, nr_cpu, thread_index);
-
- if (evsel->idx || thread_index) {
- struct perf_evsel *first;
- first = list_entry(evsel_list.next, struct perf_evsel, node);
- ret = ioctl(FD(evsel, nr_cpu, thread_index),
- PERF_EVENT_IOC_SET_OUTPUT,
- FD(first, nr_cpu, 0));
- if (ret) {
- error("failed to set output: %d (%s)\n", errno,
- strerror(errno));
- exit(-1);
- }
- } else {
- mmap_array[nr_cpu].prev = 0;
- mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
- mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, nr_cpu, thread_index), 0);
- if (mmap_array[nr_cpu].base == MAP_FAILED) {
- error("failed to mmap with %d (%s)\n", errno, strerror(errno));
- exit(-1);
- }
-
- event_array[nr_poll].fd = FD(evsel, nr_cpu, thread_index);
- event_array[nr_poll].events = POLLIN;
- nr_poll++;
- }
-
- if (filter != NULL) {
- ret = ioctl(FD(evsel, nr_cpu, thread_index),
- PERF_EVENT_IOC_SET_FILTER, filter);
- if (ret) {
- error("failed to set filter with %d (%s)\n", errno,
- strerror(errno));
- exit(-1);
- }
- }
- }
-
- if (!sample_type)
- sample_type = attr->sample_type;
-}
-
-static void open_counters(int cpu)
-{
- struct perf_evsel *pos;
-
- group_fd = -1;
-
- list_for_each_entry(pos, &evsel_list, node)
- create_counter(pos, cpu);
-
- nr_cpu++;
+ perf_session__update_sample_type(session);
}
static int process_buildids(void)
@@ -481,14 +364,14 @@ static void atexit_header(void)
if (!no_buildid)
process_buildids();
- perf_header__write(&session->header, output, true);
+ perf_session__write_header(session, evsel_list, output, true);
perf_session__delete(session);
- perf_evsel_list__delete();
+ perf_evlist__delete(evsel_list);
symbol__exit();
}
}
-static void event__synthesize_guest_os(struct machine *machine, void *data)
+static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
{
int err;
struct perf_session *psession = data;
@@ -504,8 +387,8 @@ static void event__synthesize_guest_os(struct machine *machine, void *data)
*method is used to avoid symbol missing when the first addr is
*in module instead of in guest kernel.
*/
- err = event__synthesize_modules(process_synthesized_event,
- psession, machine);
+ err = perf_event__synthesize_modules(process_synthesized_event,
+ psession, machine);
if (err < 0)
pr_err("Couldn't record guest kernel [%d]'s reference"
" relocation symbol.\n", machine->pid);
@@ -514,11 +397,12 @@ static void event__synthesize_guest_os(struct machine *machine, void *data)
* We use _stext for guest kernel because guest kernel's /proc/kallsyms
* have no _text sometimes.
*/
- err = event__synthesize_kernel_mmap(process_synthesized_event,
- psession, machine, "_text");
+ err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
+ psession, machine, "_text");
if (err < 0)
- err = event__synthesize_kernel_mmap(process_synthesized_event,
- psession, machine, "_stext");
+ err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
+ psession, machine,
+ "_stext");
if (err < 0)
pr_err("Couldn't record guest kernel [%d]'s reference"
" relocation symbol.\n", machine->pid);
@@ -533,9 +417,9 @@ static void mmap_read_all(void)
{
int i;
- for (i = 0; i < nr_cpu; i++) {
- if (mmap_array[i].base)
- mmap_read(&mmap_array[i]);
+ for (i = 0; i < evsel_list->cpus->nr; i++) {
+ if (evsel_list->mmap[i].base)
+ mmap_read(&evsel_list->mmap[i]);
}
if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
@@ -566,18 +450,26 @@ static int __cmd_record(int argc, const char **argv)
exit(-1);
}
- if (!strcmp(output_name, "-"))
- pipe_output = 1;
- else if (!stat(output_name, &st) && st.st_size) {
- if (write_mode == WRITE_FORCE) {
- char oldname[PATH_MAX];
- snprintf(oldname, sizeof(oldname), "%s.old",
- output_name);
- unlink(oldname);
- rename(output_name, oldname);
+ if (!output_name) {
+ if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
+ pipe_output = 1;
+ else
+ output_name = "perf.data";
+ }
+ if (output_name) {
+ if (!strcmp(output_name, "-"))
+ pipe_output = 1;
+ else if (!stat(output_name, &st) && st.st_size) {
+ if (write_mode == WRITE_FORCE) {
+ char oldname[PATH_MAX];
+ snprintf(oldname, sizeof(oldname), "%s.old",
+ output_name);
+ unlink(oldname);
+ rename(output_name, oldname);
+ }
+ } else if (write_mode == WRITE_APPEND) {
+ write_mode = WRITE_FORCE;
}
- } else if (write_mode == WRITE_APPEND) {
- write_mode = WRITE_FORCE;
}
flags = O_CREAT|O_RDWR;
@@ -606,19 +498,14 @@ static int __cmd_record(int argc, const char **argv)
perf_header__set_feat(&session->header, HEADER_BUILD_ID);
if (!file_new) {
- err = perf_header__read(session, output);
+ err = perf_session__read_header(session, output);
if (err < 0)
goto out_delete_session;
}
- if (have_tracepoints(&evsel_list))
+ if (have_tracepoints(&evsel_list->entries))
perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
- /*
- * perf_session__delete(session) will be called at atexit_header()
- */
- atexit(atexit_header);
-
if (forks) {
child_pid = fork();
if (child_pid < 0) {
@@ -659,7 +546,7 @@ static int __cmd_record(int argc, const char **argv)
}
if (!system_wide && target_tid == -1 && target_pid == -1)
- threads->map[0] = child_pid;
+ evsel_list->threads->map[0] = child_pid;
close(child_ready_pipe[1]);
close(go_pipe[0]);
@@ -673,46 +560,42 @@ static int __cmd_record(int argc, const char **argv)
close(child_ready_pipe[0]);
}
- if (!system_wide && no_inherit && !cpu_list) {
- open_counters(-1);
- } else {
- for (i = 0; i < cpus->nr; i++)
- open_counters(cpus->map[i]);
- }
+ open_counters(evsel_list);
- perf_session__set_sample_type(session, sample_type);
+ /*
+ * perf_session__delete(session) will be called at atexit_header()
+ */
+ atexit(atexit_header);
if (pipe_output) {
err = perf_header__write_pipe(output);
if (err < 0)
return err;
} else if (file_new) {
- err = perf_header__write(&session->header, output, false);
+ err = perf_session__write_header(session, evsel_list,
+ output, false);
if (err < 0)
return err;
}
post_processing_offset = lseek(output, 0, SEEK_CUR);
- perf_session__set_sample_id_all(session, sample_id_all_avail);
-
if (pipe_output) {
- err = event__synthesize_attrs(&session->header,
- process_synthesized_event,
- session);
+ err = perf_session__synthesize_attrs(session,
+ process_synthesized_event);
if (err < 0) {
pr_err("Couldn't synthesize attrs.\n");
return err;
}
- err = event__synthesize_event_types(process_synthesized_event,
- session);
+ err = perf_event__synthesize_event_types(process_synthesized_event,
+ session);
if (err < 0) {
pr_err("Couldn't synthesize event_types.\n");
return err;
}
- if (have_tracepoints(&evsel_list)) {
+ if (have_tracepoints(&evsel_list->entries)) {
/*
* FIXME err <= 0 here actually means that
* there were no tracepoints so its not really
@@ -721,9 +604,9 @@ static int __cmd_record(int argc, const char **argv)
* return this more properly and also
* propagate errors that now are calling die()
*/
- err = event__synthesize_tracing_data(output, &evsel_list,
- process_synthesized_event,
- session);
+ err = perf_event__synthesize_tracing_data(output, evsel_list,
+ process_synthesized_event,
+ session);
if (err <= 0) {
pr_err("Couldn't record tracing data.\n");
return err;
@@ -738,31 +621,34 @@ static int __cmd_record(int argc, const char **argv)
return -1;
}
- err = event__synthesize_kernel_mmap(process_synthesized_event,
- session, machine, "_text");
+ err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
+ session, machine, "_text");
if (err < 0)
- err = event__synthesize_kernel_mmap(process_synthesized_event,
- session, machine, "_stext");
+ err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
+ session, machine, "_stext");
if (err < 0)
pr_err("Couldn't record kernel reference relocation symbol\n"
"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
"Check /proc/kallsyms permission or run as root.\n");
- err = event__synthesize_modules(process_synthesized_event,
- session, machine);
+ err = perf_event__synthesize_modules(process_synthesized_event,
+ session, machine);
if (err < 0)
pr_err("Couldn't record kernel module information.\n"
"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
"Check /proc/modules permission or run as root.\n");
if (perf_guest)
- perf_session__process_machines(session, event__synthesize_guest_os);
+ perf_session__process_machines(session,
+ perf_event__synthesize_guest_os);
if (!system_wide)
- event__synthesize_thread_map(threads, process_synthesized_event,
- session);
+ perf_event__synthesize_thread_map(evsel_list->threads,
+ process_synthesized_event,
+ session);
else
- event__synthesize_threads(process_synthesized_event, session);
+ perf_event__synthesize_threads(process_synthesized_event,
+ session);
if (realtime_prio) {
struct sched_param param;
@@ -789,17 +675,17 @@ static int __cmd_record(int argc, const char **argv)
if (hits == samples) {
if (done)
break;
- err = poll(event_array, nr_poll, -1);
+ err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
waking++;
}
if (done) {
- for (i = 0; i < nr_cpu; i++) {
+ for (i = 0; i < evsel_list->cpus->nr; i++) {
struct perf_evsel *pos;
- list_for_each_entry(pos, &evsel_list, node) {
+ list_for_each_entry(pos, &evsel_list->entries, node) {
for (thread = 0;
- thread < threads->nr;
+ thread < evsel_list->threads->nr;
thread++)
ioctl(FD(pos, i, thread),
PERF_EVENT_IOC_DISABLE);
@@ -838,10 +724,10 @@ static const char * const record_usage[] = {
static bool force, append_file;
const struct option record_options[] = {
- OPT_CALLBACK('e', "event", NULL, "event",
+ OPT_CALLBACK('e', "event", &evsel_list, "event",
"event selector. use 'perf list' to list available events",
parse_events),
- OPT_CALLBACK(0, "filter", NULL, "filter",
+ OPT_CALLBACK(0, "filter", &evsel_list, "filter",
"event filter", parse_filter),
OPT_INTEGER('p', "pid", &target_pid,
"record events on existing process id"),
@@ -884,6 +770,9 @@ const struct option record_options[] = {
"do not update the buildid cache"),
OPT_BOOLEAN('B', "no-buildid", &no_buildid,
"do not collect buildids in perf.data"),
+ OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
+ "monitor event in cgroup name only",
+ parse_cgroups),
OPT_END()
};
@@ -892,6 +781,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
int err = -ENOMEM;
struct perf_evsel *pos;
+ evsel_list = perf_evlist__new(NULL, NULL);
+ if (evsel_list == NULL)
+ return -ENOMEM;
+
argc = parse_options(argc, argv, record_options, record_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc && target_pid == -1 && target_tid == -1 &&
@@ -908,12 +801,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
write_mode = WRITE_FORCE;
}
+ if (nr_cgroups && !system_wide) {
+ fprintf(stderr, "cgroup monitoring only available in"
+ " system-wide mode\n");
+ usage_with_options(record_usage, record_options);
+ }
+
symbol__init();
if (no_buildid_cache || no_buildid)
disable_buildid_cache();
- if (list_empty(&evsel_list) && perf_evsel_list__create_default() < 0) {
+ if (evsel_list->nr_entries == 0 &&
+ perf_evlist__add_default(evsel_list) < 0) {
pr_err("Not enough memory for event selector list\n");
goto out_symbol_exit;
}
@@ -921,27 +821,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
if (target_pid != -1)
target_tid = target_pid;
- threads = thread_map__new(target_pid, target_tid);
- if (threads == NULL) {
- pr_err("Problems finding threads of monitor\n");
+ if (perf_evlist__create_maps(evsel_list, target_pid,
+ target_tid, cpu_list) < 0)
usage_with_options(record_usage, record_options);
- }
- cpus = cpu_map__new(cpu_list);
- if (cpus == NULL) {
- perror("failed to parse CPUs map");
- return -1;
- }
-
- list_for_each_entry(pos, &evsel_list, node) {
- if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+ list_for_each_entry(pos, &evsel_list->entries, node) {
+ if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
+ evsel_list->threads->nr) < 0)
goto out_free_fd;
if (perf_header__push_event(pos->attr.config, event_name(pos)))
goto out_free_fd;
}
- event_array = malloc((sizeof(struct pollfd) * MAX_NR_CPUS *
- MAX_COUNTERS * threads->nr));
- if (!event_array)
+
+ if (perf_evlist__alloc_pollfd(evsel_list) < 0)
goto out_free_fd;
if (user_interval != ULLONG_MAX)
@@ -959,16 +851,12 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
} else {
fprintf(stderr, "frequency and count are zero, aborting\n");
err = -EINVAL;
- goto out_free_event_array;
+ goto out_free_fd;
}
err = __cmd_record(argc, argv);
-
-out_free_event_array:
- free(event_array);
out_free_fd:
- thread_map__delete(threads);
- threads = NULL;
+ perf_evlist__delete_maps(evsel_list);
out_symbol_exit:
symbol__exit();
return err;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c27e31f289e6..b1b82009ab9b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -9,6 +9,7 @@
#include "util/util.h"
+#include "util/annotate.h"
#include "util/color.h"
#include <linux/list.h>
#include "util/cache.h"
@@ -20,6 +21,8 @@
#include "perf.h"
#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
#include "util/header.h"
#include "util/session.h"
@@ -43,120 +46,79 @@ static const char default_pretty_printing_style[] = "normal";
static const char *pretty_printing_style = default_pretty_printing_style;
static char callchain_default_opt[] = "fractal,0.5";
+static symbol_filter_t annotate_init;
-static struct hists *perf_session__hists_findnew(struct perf_session *self,
- u64 event_stream, u32 type,
- u64 config)
-{
- struct rb_node **p = &self->hists_tree.rb_node;
- struct rb_node *parent = NULL;
- struct hists *iter, *new;
-
- while (*p != NULL) {
- parent = *p;
- iter = rb_entry(parent, struct hists, rb_node);
- if (iter->config == config)
- return iter;
-
-
- if (config > iter->config)
- p = &(*p)->rb_right;
- else
- p = &(*p)->rb_left;
- }
-
- new = malloc(sizeof(struct hists));
- if (new == NULL)
- return NULL;
- memset(new, 0, sizeof(struct hists));
- new->event_stream = event_stream;
- new->config = config;
- new->type = type;
- rb_link_node(&new->rb_node, parent, p);
- rb_insert_color(&new->rb_node, &self->hists_tree);
- return new;
-}
-
-static int perf_session__add_hist_entry(struct perf_session *self,
+static int perf_session__add_hist_entry(struct perf_session *session,
struct addr_location *al,
- struct sample_data *data)
+ struct perf_sample *sample)
{
- struct map_symbol *syms = NULL;
struct symbol *parent = NULL;
- int err = -ENOMEM;
+ int err = 0;
struct hist_entry *he;
- struct hists *hists;
- struct perf_event_attr *attr;
-
- if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) {
- syms = perf_session__resolve_callchain(self, al->thread,
- data->callchain, &parent);
- if (syms == NULL)
- return -ENOMEM;
+ struct perf_evsel *evsel;
+
+ if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
+ err = perf_session__resolve_callchain(session, al->thread,
+ sample->callchain, &parent);
+ if (err)
+ return err;
}
- attr = perf_header__find_attr(data->id, &self->header);
- if (attr)
- hists = perf_session__hists_findnew(self, data->id, attr->type, attr->config);
- else
- hists = perf_session__hists_findnew(self, data->id, 0, 0);
- if (hists == NULL)
- goto out_free_syms;
- he = __hists__add_entry(hists, al, parent, data->period);
+ evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+ if (evsel == NULL) {
+ /*
+ * FIXME: Propagate this back, but at least we're in a builtin,
+ * where exit() is allowed. ;-)
+ */
+ ui__warning("Invalid %s file, contains samples with id %" PRIu64 " not in "
+ "its header!\n", input_name, sample->id);
+ exit_browser(0);
+ exit(1);
+ }
+
+ he = __hists__add_entry(&evsel->hists, al, parent, sample->period);
if (he == NULL)
- goto out_free_syms;
- err = 0;
+ return -ENOMEM;
+
if (symbol_conf.use_callchain) {
- err = callchain_append(he->callchain, data->callchain, syms,
- data->period);
+ err = callchain_append(he->callchain, &session->callchain_cursor,
+ sample->period);
if (err)
- goto out_free_syms;
+ return err;
}
/*
* Only in the newt browser we are doing integrated annotation,
* so we don't allocated the extra space needed because the stdio
* code will not use it.
*/
- if (use_browser > 0)
- err = hist_entry__inc_addr_samples(he, al->addr);
-out_free_syms:
- free(syms);
- return err;
-}
+ if (al->sym != NULL && use_browser > 0) {
+ struct annotation *notes = symbol__annotation(he->ms.sym);
-static int add_event_total(struct perf_session *session,
- struct sample_data *data,
- struct perf_event_attr *attr)
-{
- struct hists *hists;
+ assert(evsel != NULL);
- if (attr)
- hists = perf_session__hists_findnew(session, data->id,
- attr->type, attr->config);
- else
- hists = perf_session__hists_findnew(session, data->id, 0, 0);
+ err = -ENOMEM;
+ if (notes->src == NULL &&
+ symbol__alloc_hist(he->ms.sym, session->evlist->nr_entries) < 0)
+ goto out;
- if (!hists)
- return -ENOMEM;
+ err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+ }
- hists->stats.total_period += data->period;
- /*
- * FIXME: add_event_total should be moved from here to
- * perf_session__process_event so that the proper hist is passed to
- * the event_op methods.
- */
- hists__inc_nr_events(hists, PERF_RECORD_SAMPLE);
- session->hists.stats.total_period += data->period;
- return 0;
+ evsel->hists.stats.total_period += sample->period;
+ hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+out:
+ return err;
}
-static int process_sample_event(event_t *event, struct sample_data *sample,
+
+static int process_sample_event(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct addr_location al;
- struct perf_event_attr *attr;
- if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
+ if (perf_event__preprocess_sample(event, session, &al, sample,
+ annotate_init) < 0) {
fprintf(stderr, "problem processing %d event, skipping it.\n",
event->header.type);
return -1;
@@ -170,26 +132,17 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
return -1;
}
- attr = perf_header__find_attr(sample->id, &session->header);
-
- if (add_event_total(session, sample, attr)) {
- pr_debug("problem adding event period\n");
- return -1;
- }
-
return 0;
}
-static int process_read_event(event_t *event, struct sample_data *sample __used,
- struct perf_session *session __used)
+static int process_read_event(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
- struct perf_event_attr *attr;
-
- attr = perf_header__find_attr(event->read.id, &session->header);
-
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist,
+ event->read.id);
if (show_threads) {
- const char *name = attr ? __event_name(attr->type, attr->config)
- : "unknown";
+ const char *name = evsel ? event_name(evsel) : "unknown";
perf_read_values_add_value(&show_threads_values,
event->read.pid, event->read.tid,
event->read.id,
@@ -198,7 +151,7 @@ static int process_read_event(event_t *event, struct sample_data *sample __used,
}
dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid,
- attr ? __event_name(attr->type, attr->config) : "FAIL",
+ evsel ? event_name(evsel) : "FAIL",
event->read.value);
return 0;
@@ -222,7 +175,7 @@ static int perf_session__setup_sample_type(struct perf_session *self)
} else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
!symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
- if (register_callchain_param(&callchain_param) < 0) {
+ if (callchain_register_param(&callchain_param) < 0) {
fprintf(stderr, "Can't register callchain"
" params\n");
return -EINVAL;
@@ -233,17 +186,17 @@ static int perf_session__setup_sample_type(struct perf_session *self)
}
static struct perf_event_ops event_ops = {
- .sample = process_sample_event,
- .mmap = event__process_mmap,
- .comm = event__process_comm,
- .exit = event__process_task,
- .fork = event__process_task,
- .lost = event__process_lost,
- .read = process_read_event,
- .attr = event__process_attr,
- .event_type = event__process_event_type,
- .tracing_data = event__process_tracing_data,
- .build_id = event__process_build_id,
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .comm = perf_event__process_comm,
+ .exit = perf_event__process_task,
+ .fork = perf_event__process_task,
+ .lost = perf_event__process_lost,
+ .read = process_read_event,
+ .attr = perf_event__process_attr,
+ .event_type = perf_event__process_event_type,
+ .tracing_data = perf_event__process_tracing_data,
+ .build_id = perf_event__process_build_id,
.ordered_samples = true,
.ordering_requires_timestamps = true,
};
@@ -269,21 +222,21 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self,
return ret + fprintf(fp, "\n#\n");
}
-static int hists__tty_browse_tree(struct rb_root *tree, const char *help)
+static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
+ const char *help)
{
- struct rb_node *next = rb_first(tree);
+ struct perf_evsel *pos;
- while (next) {
- struct hists *hists = rb_entry(next, struct hists, rb_node);
+ list_for_each_entry(pos, &evlist->entries, node) {
+ struct hists *hists = &pos->hists;
const char *evname = NULL;
if (rb_first(&hists->entries) != rb_last(&hists->entries))
- evname = __event_name(hists->type, hists->config);
+ evname = event_name(pos);
hists__fprintf_nr_sample_events(hists, evname, stdout);
hists__fprintf(hists, NULL, false, stdout);
fprintf(stdout, "\n\n");
- next = rb_next(&hists->rb_node);
}
if (sort_order == default_sort_order &&
@@ -304,8 +257,9 @@ static int hists__tty_browse_tree(struct rb_root *tree, const char *help)
static int __cmd_report(void)
{
int ret = -EINVAL;
+ u64 nr_samples;
struct perf_session *session;
- struct rb_node *next;
+ struct perf_evsel *pos;
const char *help = "For a higher level overview, try: perf report --sort comm,dso";
signal(SIGINT, sig_handler);
@@ -336,20 +290,24 @@ static int __cmd_report(void)
if (verbose > 2)
perf_session__fprintf_dsos(session, stdout);
- next = rb_first(&session->hists_tree);
- while (next) {
- struct hists *hists;
+ nr_samples = 0;
+ list_for_each_entry(pos, &session->evlist->entries, node) {
+ struct hists *hists = &pos->hists;
- hists = rb_entry(next, struct hists, rb_node);
hists__collapse_resort(hists);
hists__output_resort(hists);
- next = rb_next(&hists->rb_node);
+ nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
+ }
+
+ if (nr_samples == 0) {
+ ui__warning("The %s file has no samples!\n", input_name);
+ goto out_delete;
}
if (use_browser > 0)
- hists__tui_browse_tree(&session->hists_tree, help);
+ perf_evlist__tui_browse_hists(session->evlist, help);
else
- hists__tty_browse_tree(&session->hists_tree, help);
+ perf_evlist__tty_browse_hists(session->evlist, help);
out_delete:
/*
@@ -424,7 +382,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
if (tok2)
callchain_param.print_limit = strtod(tok2, &endptr);
setup:
- if (register_callchain_param(&callchain_param) < 0) {
+ if (callchain_register_param(&callchain_param) < 0) {
fprintf(stderr, "Can't register callchain params\n");
return -1;
}
@@ -498,7 +456,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
use_browser = 1;
if (strcmp(input_name, "-") != 0)
- setup_browser();
+ setup_browser(true);
else
use_browser = 0;
/*
@@ -507,7 +465,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
* implementation.
*/
if (use_browser > 0) {
- symbol_conf.priv_size = sizeof(struct sym_priv);
+ symbol_conf.priv_size = sizeof(struct annotation);
+ annotate_init = symbol__annotate_init;
/*
* For searching by name on the "Browse map details".
* providing it only in verbose mode not to bloat too
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 29acb894e035..a32f411faeac 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -369,11 +369,6 @@ static void
process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
{
int ret = 0;
- u64 now;
- long long delta;
-
- now = get_nsecs();
- delta = start_time + atom->timestamp - now;
switch (atom->type) {
case SCHED_EVENT_RUN:
@@ -562,7 +557,7 @@ static void wait_for_tasks(void)
static void run_one_test(void)
{
- u64 T0, T1, delta, avg_delta, fluct, std_dev;
+ u64 T0, T1, delta, avg_delta, fluct;
T0 = get_nsecs();
wait_for_tasks();
@@ -578,7 +573,6 @@ static void run_one_test(void)
else
fluct = delta - avg_delta;
sum_fluct += fluct;
- std_dev = sum_fluct / nr_runs / sqrt(nr_runs);
if (!run_avg)
run_avg = delta;
run_avg = (run_avg*9 + delta)/10;
@@ -799,7 +793,7 @@ replay_switch_event(struct trace_switch_event *switch_event,
u64 timestamp,
struct thread *thread __used)
{
- struct task_desc *prev, *next;
+ struct task_desc *prev, __used *next;
u64 timestamp0;
s64 delta;
@@ -1404,7 +1398,7 @@ map_switch_event(struct trace_switch_event *switch_event,
u64 timestamp,
struct thread *thread __used)
{
- struct thread *sched_out, *sched_in;
+ struct thread *sched_out __used, *sched_in;
int new_shortname;
u64 timestamp0;
s64 delta;
@@ -1580,9 +1574,9 @@ process_sched_migrate_task_event(void *data, struct perf_session *session,
event, cpu, timestamp, thread);
}
-static void
-process_raw_event(event_t *raw_event __used, struct perf_session *session,
- void *data, int cpu, u64 timestamp, struct thread *thread)
+static void process_raw_event(union perf_event *raw_event __used,
+ struct perf_session *session, void *data, int cpu,
+ u64 timestamp, struct thread *thread)
{
struct event *event;
int type;
@@ -1607,7 +1601,8 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session,
process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
}
-static int process_sample_event(event_t *event, struct sample_data *sample,
+static int process_sample_event(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct thread *thread;
@@ -1635,9 +1630,9 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
static struct perf_event_ops event_ops = {
.sample = process_sample_event,
- .comm = event__process_comm,
- .lost = event__process_lost,
- .fork = event__process_task,
+ .comm = perf_event__process_comm,
+ .lost = perf_event__process_lost,
+ .fork = perf_event__process_task,
.ordered_samples = true,
};
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index b766c2a9ac97..5f40df635dcb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -63,7 +63,8 @@ static int cleanup_scripting(void)
static char const *input_name = "perf.data";
-static int process_sample_event(event_t *event, struct sample_data *sample,
+static int process_sample_event(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct thread *thread = perf_session__findnew(session, event->ip.pid);
@@ -100,14 +101,14 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
}
static struct perf_event_ops event_ops = {
- .sample = process_sample_event,
- .comm = event__process_comm,
- .attr = event__process_attr,
- .event_type = event__process_event_type,
- .tracing_data = event__process_tracing_data,
- .build_id = event__process_build_id,
- .ordering_requires_timestamps = true,
+ .sample = process_sample_event,
+ .comm = perf_event__process_comm,
+ .attr = perf_event__process_attr,
+ .event_type = perf_event__process_event_type,
+ .tracing_data = perf_event__process_tracing_data,
+ .build_id = perf_event__process_build_id,
.ordered_samples = true,
+ .ordering_requires_timestamps = true,
};
extern volatile int session_done;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a482a191a0ca..21c025222496 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -43,11 +43,13 @@
#include "util/parse-options.h"
#include "util/parse-events.h"
#include "util/event.h"
+#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
#include "util/header.h"
#include "util/cpumap.h"
#include "util/thread.h"
+#include "util/thread_map.h"
#include <sys/prctl.h>
#include <math.h>
@@ -71,8 +73,9 @@ static struct perf_event_attr default_attrs[] = {
};
+struct perf_evlist *evsel_list;
+
static bool system_wide = false;
-static struct cpu_map *cpus;
static int run_idx = 0;
static int run_count = 1;
@@ -81,7 +84,6 @@ static bool scale = true;
static bool no_aggr = false;
static pid_t target_pid = -1;
static pid_t target_tid = -1;
-static struct thread_map *threads;
static pid_t child_pid = -1;
static bool null_run = false;
static bool big_num = true;
@@ -166,7 +168,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
PERF_FORMAT_TOTAL_TIME_RUNNING;
if (system_wide)
- return perf_evsel__open_per_cpu(evsel, cpus);
+ return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false, false);
attr->inherit = !no_inherit;
if (target_pid == -1 && target_tid == -1) {
@@ -174,7 +176,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
attr->enable_on_exec = 1;
}
- return perf_evsel__open_per_thread(evsel, threads);
+ return perf_evsel__open_per_thread(evsel, evsel_list->threads, false, false);
}
/*
@@ -199,7 +201,8 @@ static int read_counter_aggr(struct perf_evsel *counter)
u64 *count = counter->counts->aggr.values;
int i;
- if (__perf_evsel__read(counter, cpus->nr, threads->nr, scale) < 0)
+ if (__perf_evsel__read(counter, evsel_list->cpus->nr,
+ evsel_list->threads->nr, scale) < 0)
return -1;
for (i = 0; i < 3; i++)
@@ -232,7 +235,7 @@ static int read_counter(struct perf_evsel *counter)
u64 *count;
int cpu;
- for (cpu = 0; cpu < cpus->nr; cpu++) {
+ for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
return -1;
@@ -297,7 +300,7 @@ static int run_perf_stat(int argc __used, const char **argv)
}
if (target_tid == -1 && target_pid == -1 && !system_wide)
- threads->map[0] = child_pid;
+ evsel_list->threads->map[0] = child_pid;
/*
* Wait for the child to be ready to exec.
@@ -309,7 +312,7 @@ static int run_perf_stat(int argc __used, const char **argv)
close(child_ready_pipe[0]);
}
- list_for_each_entry(counter, &evsel_list, node) {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
if (create_perf_stat_counter(counter) < 0) {
if (errno == -EPERM || errno == -EACCES) {
error("You may not have permission to collect %sstats.\n"
@@ -347,14 +350,15 @@ static int run_perf_stat(int argc __used, const char **argv)
update_stats(&walltime_nsecs_stats, t1 - t0);
if (no_aggr) {
- list_for_each_entry(counter, &evsel_list, node) {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
read_counter(counter);
- perf_evsel__close_fd(counter, cpus->nr, 1);
+ perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
}
} else {
- list_for_each_entry(counter, &evsel_list, node) {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
read_counter_aggr(counter);
- perf_evsel__close_fd(counter, cpus->nr, threads->nr);
+ perf_evsel__close_fd(counter, evsel_list->cpus->nr,
+ evsel_list->threads->nr);
}
}
@@ -382,10 +386,13 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
- cpus->map[cpu], csv_sep);
+ evsel_list->cpus->map[cpu], csv_sep);
fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
+ if (evsel->cgrp)
+ fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
+
if (csv_output)
return;
@@ -410,12 +417,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
- cpus->map[cpu], csv_sep);
+ evsel_list->cpus->map[cpu], csv_sep);
else
cpu = 0;
fprintf(stderr, fmt, cpustr, avg, csv_sep, event_name(evsel));
+ if (evsel->cgrp)
+ fprintf(stderr, "%s%s", csv_sep, evsel->cgrp->name);
+
if (csv_output)
return;
@@ -456,9 +466,17 @@ static void print_counter_aggr(struct perf_evsel *counter)
int scaled = counter->counts->scaled;
if (scaled == -1) {
- fprintf(stderr, "%*s%s%-24s\n",
+ fprintf(stderr, "%*s%s%*s",
csv_output ? 0 : 18,
- "<not counted>", csv_sep, event_name(counter));
+ "<not counted>",
+ csv_sep,
+ csv_output ? 0 : -24,
+ event_name(counter));
+
+ if (counter->cgrp)
+ fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
+
+ fputc('\n', stderr);
return;
}
@@ -483,7 +501,6 @@ static void print_counter_aggr(struct perf_evsel *counter)
fprintf(stderr, " (scaled from %.2f%%)",
100 * avg_running / avg_enabled);
}
-
fprintf(stderr, "\n");
}
@@ -496,19 +513,23 @@ static void print_counter(struct perf_evsel *counter)
u64 ena, run, val;
int cpu;
- for (cpu = 0; cpu < cpus->nr; cpu++) {
+ for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
val = counter->counts->cpu[cpu].val;
ena = counter->counts->cpu[cpu].ena;
run = counter->counts->cpu[cpu].run;
if (run == 0 || ena == 0) {
- fprintf(stderr, "CPU%*d%s%*s%s%-24s",
+ fprintf(stderr, "CPU%*d%s%*s%s%*s",
csv_output ? 0 : -4,
- cpus->map[cpu], csv_sep,
+ evsel_list->cpus->map[cpu], csv_sep,
csv_output ? 0 : 18,
"<not counted>", csv_sep,
+ csv_output ? 0 : -24,
event_name(counter));
- fprintf(stderr, "\n");
+ if (counter->cgrp)
+ fprintf(stderr, "%s%s", csv_sep, counter->cgrp->name);
+
+ fputc('\n', stderr);
continue;
}
@@ -525,7 +546,7 @@ static void print_counter(struct perf_evsel *counter)
100.0 * run / ena);
}
}
- fprintf(stderr, "\n");
+ fputc('\n', stderr);
}
}
@@ -555,10 +576,10 @@ static void print_stat(int argc, const char **argv)
}
if (no_aggr) {
- list_for_each_entry(counter, &evsel_list, node)
+ list_for_each_entry(counter, &evsel_list->entries, node)
print_counter(counter);
} else {
- list_for_each_entry(counter, &evsel_list, node)
+ list_for_each_entry(counter, &evsel_list->entries, node)
print_counter_aggr(counter);
}
@@ -610,7 +631,7 @@ static int stat__set_big_num(const struct option *opt __used,
}
static const struct option options[] = {
- OPT_CALLBACK('e', "event", NULL, "event",
+ OPT_CALLBACK('e', "event", &evsel_list, "event",
"event selector. use 'perf list' to list available events",
parse_events),
OPT_BOOLEAN('i', "no-inherit", &no_inherit,
@@ -638,6 +659,9 @@ static const struct option options[] = {
"disable CPU count aggregation"),
OPT_STRING('x', "field-separator", &csv_sep, "separator",
"print counts with custom separator"),
+ OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
+ "monitor event in cgroup name only",
+ parse_cgroups),
OPT_END()
};
@@ -648,6 +672,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
setlocale(LC_ALL, "");
+ evsel_list = perf_evlist__new(NULL, NULL);
+ if (evsel_list == NULL)
+ return -ENOMEM;
+
argc = parse_options(argc, argv, options, stat_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
@@ -674,49 +702,50 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
if (run_count <= 0)
usage_with_options(stat_usage, options);
- /* no_aggr is for system-wide only */
- if (no_aggr && !system_wide)
+ /* no_aggr, cgroup are for system-wide only */
+ if ((no_aggr || nr_cgroups) && !system_wide) {
+ fprintf(stderr, "both cgroup and no-aggregation "
+ "modes only available in system-wide mode\n");
+
usage_with_options(stat_usage, options);
+ }
/* Set attrs and nr_counters if no event is selected and !null_run */
- if (!null_run && !nr_counters) {
+ if (!null_run && !evsel_list->nr_entries) {
size_t c;
- nr_counters = ARRAY_SIZE(default_attrs);
-
for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
- pos = perf_evsel__new(&default_attrs[c],
- nr_counters);
+ pos = perf_evsel__new(&default_attrs[c], c);
if (pos == NULL)
goto out;
- list_add(&pos->node, &evsel_list);
+ perf_evlist__add(evsel_list, pos);
}
}
if (target_pid != -1)
target_tid = target_pid;
- threads = thread_map__new(target_pid, target_tid);
- if (threads == NULL) {
+ evsel_list->threads = thread_map__new(target_pid, target_tid);
+ if (evsel_list->threads == NULL) {
pr_err("Problems finding threads of monitor\n");
usage_with_options(stat_usage, options);
}
if (system_wide)
- cpus = cpu_map__new(cpu_list);
+ evsel_list->cpus = cpu_map__new(cpu_list);
else
- cpus = cpu_map__dummy_new();
+ evsel_list->cpus = cpu_map__dummy_new();
- if (cpus == NULL) {
+ if (evsel_list->cpus == NULL) {
perror("failed to parse CPUs map");
usage_with_options(stat_usage, options);
return -1;
}
- list_for_each_entry(pos, &evsel_list, node) {
+ list_for_each_entry(pos, &evsel_list->entries, node) {
if (perf_evsel__alloc_stat_priv(pos) < 0 ||
- perf_evsel__alloc_counts(pos, cpus->nr) < 0 ||
- perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+ perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 ||
+ perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0)
goto out_free_fd;
}
@@ -741,11 +770,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
if (status != -1)
print_stat(argc, argv);
out_free_fd:
- list_for_each_entry(pos, &evsel_list, node)
+ list_for_each_entry(pos, &evsel_list->entries, node)
perf_evsel__free_stat_priv(pos);
- perf_evsel_list__delete();
+ perf_evlist__delete_maps(evsel_list);
out:
- thread_map__delete(threads);
- threads = NULL;
+ perf_evlist__delete(evsel_list);
return status;
}
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 5dcdba653d70..1b2106c58f66 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -7,10 +7,11 @@
#include "util/cache.h"
#include "util/debug.h"
+#include "util/evlist.h"
#include "util/parse-options.h"
-#include "util/session.h"
+#include "util/parse-events.h"
#include "util/symbol.h"
-#include "util/thread.h"
+#include "util/thread_map.h"
static long page_size;
@@ -238,14 +239,14 @@ out:
#include "util/evsel.h"
#include <sys/types.h>
-static int trace_event__id(const char *event_name)
+static int trace_event__id(const char *evname)
{
char *filename;
int err = -1, fd;
if (asprintf(&filename,
"/sys/kernel/debug/tracing/events/syscalls/%s/id",
- event_name) < 0)
+ evname) < 0)
return -1;
fd = open(filename, O_RDONLY);
@@ -289,7 +290,7 @@ static int test__open_syscall_event(void)
goto out_thread_map_delete;
}
- if (perf_evsel__open_per_thread(evsel, threads) < 0) {
+ if (perf_evsel__open_per_thread(evsel, threads, false, false) < 0) {
pr_debug("failed to open counter: %s, "
"tweak /proc/sys/kernel/perf_event_paranoid?\n",
strerror(errno));
@@ -347,9 +348,9 @@ static int test__open_syscall_event_on_all_cpus(void)
}
cpus = cpu_map__new(NULL);
- if (threads == NULL) {
- pr_debug("thread_map__new\n");
- return -1;
+ if (cpus == NULL) {
+ pr_debug("cpu_map__new\n");
+ goto out_thread_map_delete;
}
@@ -364,7 +365,7 @@ static int test__open_syscall_event_on_all_cpus(void)
goto out_thread_map_delete;
}
- if (perf_evsel__open(evsel, cpus, threads) < 0) {
+ if (perf_evsel__open(evsel, cpus, threads, false, false) < 0) {
pr_debug("failed to open counter: %s, "
"tweak /proc/sys/kernel/perf_event_paranoid?\n",
strerror(errno));
@@ -408,6 +409,8 @@ static int test__open_syscall_event_on_all_cpus(void)
goto out_close_fd;
}
+ err = 0;
+
for (cpu = 0; cpu < cpus->nr; ++cpu) {
unsigned int expected;
@@ -416,18 +419,18 @@ static int test__open_syscall_event_on_all_cpus(void)
if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
pr_debug("perf_evsel__open_read_on_cpu\n");
- goto out_close_fd;
+ err = -1;
+ break;
}
expected = nr_open_calls + cpu;
if (evsel->counts->cpu[cpu].val != expected) {
pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
expected, cpus->map[cpu], evsel->counts->cpu[cpu].val);
- goto out_close_fd;
+ err = -1;
}
}
- err = 0;
out_close_fd:
perf_evsel__close_fd(evsel, 1, threads->nr);
out_evsel_delete:
@@ -437,6 +440,159 @@ out_thread_map_delete:
return err;
}
+/*
+ * This test will generate random numbers of calls to some getpid syscalls,
+ * then establish an mmap for a group of events that are created to monitor
+ * the syscalls.
+ *
+ * It will receive the events, using mmap, use its PERF_SAMPLE_ID generated
+ * sample.id field to map back to its respective perf_evsel instance.
+ *
+ * Then it checks if the number of syscalls reported as perf events by
+ * the kernel corresponds to the number of syscalls made.
+ */
+static int test__basic_mmap(void)
+{
+ int err = -1;
+ union perf_event *event;
+ struct thread_map *threads;
+ struct cpu_map *cpus;
+ struct perf_evlist *evlist;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_TRACEPOINT,
+ .read_format = PERF_FORMAT_ID,
+ .sample_type = PERF_SAMPLE_ID,
+ .watermark = 0,
+ };
+ cpu_set_t cpu_set;
+ const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
+ "getpgid", };
+ pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
+ (void*)getpgid };
+#define nsyscalls ARRAY_SIZE(syscall_names)
+ int ids[nsyscalls];
+ unsigned int nr_events[nsyscalls],
+ expected_nr_events[nsyscalls], i, j;
+ struct perf_evsel *evsels[nsyscalls], *evsel;
+
+ for (i = 0; i < nsyscalls; ++i) {
+ char name[64];
+
+ snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
+ ids[i] = trace_event__id(name);
+ if (ids[i] < 0) {
+ pr_debug("Is debugfs mounted on /sys/kernel/debug?\n");
+ return -1;
+ }
+ nr_events[i] = 0;
+ expected_nr_events[i] = random() % 257;
+ }
+
+ threads = thread_map__new(-1, getpid());
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ return -1;
+ }
+
+ cpus = cpu_map__new(NULL);
+ if (cpus == NULL) {
+ pr_debug("cpu_map__new\n");
+ goto out_free_threads;
+ }
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(cpus->map[0], &cpu_set);
+ sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+ if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
+ pr_debug("sched_setaffinity() failed on CPU %d: %s ",
+ cpus->map[0], strerror(errno));
+ goto out_free_cpus;
+ }
+
+ evlist = perf_evlist__new(cpus, threads);
+ if (evlist == NULL) {
+ pr_debug("perf_evlist__new\n");
+ goto out_free_cpus;
+ }
+
+ /* anonymous union fields, can't be initialized above */
+ attr.wakeup_events = 1;
+ attr.sample_period = 1;
+
+ for (i = 0; i < nsyscalls; ++i) {
+ attr.config = ids[i];
+ evsels[i] = perf_evsel__new(&attr, i);
+ if (evsels[i] == NULL) {
+ pr_debug("perf_evsel__new\n");
+ goto out_free_evlist;
+ }
+
+ perf_evlist__add(evlist, evsels[i]);
+
+ if (perf_evsel__open(evsels[i], cpus, threads, false, false) < 0) {
+ pr_debug("failed to open counter: %s, "
+ "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+ strerror(errno));
+ goto out_close_fd;
+ }
+ }
+
+ if (perf_evlist__mmap(evlist, 128, true) < 0) {
+ pr_debug("failed to mmap events: %d (%s)\n", errno,
+ strerror(errno));
+ goto out_close_fd;
+ }
+
+ for (i = 0; i < nsyscalls; ++i)
+ for (j = 0; j < expected_nr_events[i]; ++j) {
+ int foo = syscalls[i]();
+ ++foo;
+ }
+
+ while ((event = perf_evlist__read_on_cpu(evlist, 0)) != NULL) {
+ struct perf_sample sample;
+
+ if (event->header.type != PERF_RECORD_SAMPLE) {
+ pr_debug("unexpected %s event\n",
+ perf_event__name(event->header.type));
+ goto out_munmap;
+ }
+
+ perf_event__parse_sample(event, attr.sample_type, false, &sample);
+ evsel = perf_evlist__id2evsel(evlist, sample.id);
+ if (evsel == NULL) {
+ pr_debug("event with id %" PRIu64
+ " doesn't map to an evsel\n", sample.id);
+ goto out_munmap;
+ }
+ nr_events[evsel->idx]++;
+ }
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
+ pr_debug("expected %d %s events, got %d\n",
+ expected_nr_events[evsel->idx],
+ event_name(evsel), nr_events[evsel->idx]);
+ goto out_munmap;
+ }
+ }
+
+ err = 0;
+out_munmap:
+ perf_evlist__munmap(evlist);
+out_close_fd:
+ for (i = 0; i < nsyscalls; ++i)
+ perf_evsel__close_fd(evsels[i], 1, threads->nr);
+out_free_evlist:
+ perf_evlist__delete(evlist);
+out_free_cpus:
+ cpu_map__delete(cpus);
+out_free_threads:
+ thread_map__delete(threads);
+ return err;
+#undef nsyscalls
+}
+
static struct test {
const char *desc;
int (*func)(void);
@@ -454,6 +610,10 @@ static struct test {
.func = test__open_syscall_event_on_all_cpus,
},
{
+ .desc = "read samples using the mmap interface",
+ .func = test__basic_mmap,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 746cf03cb05d..67c0459dc325 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -264,9 +264,6 @@ pid_put_sample(int pid, int type, unsigned int cpu, u64 start, u64 end)
c->start_time = start;
if (p->start_time == 0 || p->start_time > start)
p->start_time = start;
-
- if (cpu > numcpus)
- numcpus = cpu;
}
#define MAX_CPUS 4096
@@ -276,21 +273,24 @@ static int cpus_cstate_state[MAX_CPUS];
static u64 cpus_pstate_start_times[MAX_CPUS];
static u64 cpus_pstate_state[MAX_CPUS];
-static int process_comm_event(event_t *event, struct sample_data *sample __used,
+static int process_comm_event(union perf_event *event,
+ struct perf_sample *sample __used,
struct perf_session *session __used)
{
pid_set_comm(event->comm.tid, event->comm.comm);
return 0;
}
-static int process_fork_event(event_t *event, struct sample_data *sample __used,
+static int process_fork_event(union perf_event *event,
+ struct perf_sample *sample __used,
struct perf_session *session __used)
{
pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
return 0;
}
-static int process_exit_event(event_t *event, struct sample_data *sample __used,
+static int process_exit_event(union perf_event *event,
+ struct perf_sample *sample __used,
struct perf_session *session __used)
{
pid_exit(event->fork.pid, event->fork.time);
@@ -486,8 +486,8 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
}
-static int process_sample_event(event_t *event __used,
- struct sample_data *sample,
+static int process_sample_event(union perf_event *event __used,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct trace_entry *te;
@@ -511,6 +511,9 @@ static int process_sample_event(event_t *event __used,
if (!event_str)
return 0;
+ if (sample->cpu > numcpus)
+ numcpus = sample->cpu;
+
if (strcmp(event_str, "power:cpu_idle") == 0) {
struct power_processor_entry *ppe = (void *)te;
if (ppe->state == (u32)PWR_EVENT_EXIT)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5a29d9cd9486..80c9e062bd5b 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -20,11 +20,16 @@
#include "perf.h"
+#include "util/annotate.h"
+#include "util/cache.h"
#include "util/color.h"
+#include "util/evlist.h"
#include "util/evsel.h"
#include "util/session.h"
#include "util/symbol.h"
#include "util/thread.h"
+#include "util/thread_map.h"
+#include "util/top.h"
#include "util/util.h"
#include <linux/rbtree.h>
#include "util/parse-options.h"
@@ -45,7 +50,6 @@
#include <errno.h>
#include <time.h>
#include <sched.h>
-#include <pthread.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
@@ -60,85 +64,42 @@
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+static struct perf_top top = {
+ .count_filter = 5,
+ .delay_secs = 2,
+ .display_weighted = -1,
+ .target_pid = -1,
+ .target_tid = -1,
+ .active_symbols = LIST_HEAD_INIT(top.active_symbols),
+ .active_symbols_lock = PTHREAD_MUTEX_INITIALIZER,
+ .active_symbols_cond = PTHREAD_COND_INITIALIZER,
+ .freq = 1000, /* 1 KHz */
+};
+
static bool system_wide = false;
-static int default_interval = 0;
+static bool use_tui, use_stdio;
-static int count_filter = 5;
-static int print_entries;
+static int default_interval = 0;
-static int target_pid = -1;
-static int target_tid = -1;
-static struct thread_map *threads;
static bool inherit = false;
-static struct cpu_map *cpus;
static int realtime_prio = 0;
static bool group = false;
static unsigned int page_size;
-static unsigned int mmap_pages = 16;
-static int freq = 1000; /* 1 KHz */
+static unsigned int mmap_pages = 128;
-static int delay_secs = 2;
-static bool zero = false;
static bool dump_symtab = false;
-static bool hide_kernel_symbols = false;
-static bool hide_user_symbols = false;
static struct winsize winsize;
-/*
- * Source
- */
-
-struct source_line {
- u64 eip;
- unsigned long count[MAX_COUNTERS];
- char *line;
- struct source_line *next;
-};
-
static const char *sym_filter = NULL;
-struct sym_entry *sym_filter_entry = NULL;
struct sym_entry *sym_filter_entry_sched = NULL;
static int sym_pcnt_filter = 5;
-static int sym_counter = 0;
-static struct perf_evsel *sym_evsel = NULL;
-static int display_weighted = -1;
-static const char *cpu_list;
-
-/*
- * Symbols
- */
-
-struct sym_entry_source {
- struct source_line *source;
- struct source_line *lines;
- struct source_line **lines_tail;
- pthread_mutex_t lock;
-};
-
-struct sym_entry {
- struct rb_node rb_node;
- struct list_head node;
- unsigned long snap_count;
- double weight;
- int skip;
- u16 name_len;
- u8 origin;
- struct map *map;
- struct sym_entry_source *src;
- unsigned long count[0];
-};
/*
* Source functions
*/
-static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
-{
- return ((void *)self) + symbol_conf.priv_size;
-}
-
void get_term_dimensions(struct winsize *ws)
{
char *s = getenv("LINES");
@@ -163,10 +124,10 @@ void get_term_dimensions(struct winsize *ws)
static void update_print_entries(struct winsize *ws)
{
- print_entries = ws->ws_row;
+ top.print_entries = ws->ws_row;
- if (print_entries > 9)
- print_entries -= 9;
+ if (top.print_entries > 9)
+ top.print_entries -= 9;
}
static void sig_winch_handler(int sig __used)
@@ -178,12 +139,9 @@ static void sig_winch_handler(int sig __used)
static int parse_source(struct sym_entry *syme)
{
struct symbol *sym;
- struct sym_entry_source *source;
+ struct annotation *notes;
struct map *map;
- FILE *file;
- char command[PATH_MAX*2];
- const char *path;
- u64 len;
+ int err = -1;
if (!syme)
return -1;
@@ -194,411 +152,137 @@ static int parse_source(struct sym_entry *syme)
/*
* We can't annotate with just /proc/kallsyms
*/
- if (map->dso->origin == DSO__ORIG_KERNEL)
+ if (map->dso->origin == DSO__ORIG_KERNEL) {
+ pr_err("Can't annotate %s: No vmlinux file was found in the "
+ "path\n", sym->name);
+ sleep(1);
return -1;
-
- if (syme->src == NULL) {
- syme->src = zalloc(sizeof(*source));
- if (syme->src == NULL)
- return -1;
- pthread_mutex_init(&syme->src->lock, NULL);
}
- source = syme->src;
-
- if (source->lines) {
- pthread_mutex_lock(&source->lock);
+ notes = symbol__annotation(sym);
+ if (notes->src != NULL) {
+ pthread_mutex_lock(&notes->lock);
goto out_assign;
}
- path = map->dso->long_name;
-
- len = sym->end - sym->start;
-
- sprintf(command,
- "objdump --start-address=%#0*" PRIx64 " --stop-address=%#0*" PRIx64 " -dS %s",
- BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start),
- BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path);
-
- file = popen(command, "r");
- if (!file)
- return -1;
-
- pthread_mutex_lock(&source->lock);
- source->lines_tail = &source->lines;
- while (!feof(file)) {
- struct source_line *src;
- size_t dummy = 0;
- char *c, *sep;
- src = malloc(sizeof(struct source_line));
- assert(src != NULL);
- memset(src, 0, sizeof(struct source_line));
+ pthread_mutex_lock(&notes->lock);
- if (getline(&src->line, &dummy, file) < 0)
- break;
- if (!src->line)
- break;
-
- c = strchr(src->line, '\n');
- if (c)
- *c = 0;
-
- src->next = NULL;
- *source->lines_tail = src;
- source->lines_tail = &src->next;
-
- src->eip = strtoull(src->line, &sep, 16);
- if (*sep == ':')
- src->eip = map__objdump_2ip(map, src->eip);
- else /* this line has no ip info (e.g. source line) */
- src->eip = 0;
+ if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
+ pthread_mutex_unlock(&notes->lock);
+ pr_err("Not enough memory for annotating '%s' symbol!\n",
+ sym->name);
+ sleep(1);
+ return err;
}
- pclose(file);
+
+ err = symbol__annotate(sym, syme->map, 0);
+ if (err == 0) {
out_assign:
- sym_filter_entry = syme;
- pthread_mutex_unlock(&source->lock);
- return 0;
+ top.sym_filter_entry = syme;
+ }
+
+ pthread_mutex_unlock(&notes->lock);
+ return err;
}
static void __zero_source_counters(struct sym_entry *syme)
{
- int i;
- struct source_line *line;
-
- line = syme->src->lines;
- while (line) {
- for (i = 0; i < nr_counters; i++)
- line->count[i] = 0;
- line = line->next;
- }
+ struct symbol *sym = sym_entry__symbol(syme);
+ symbol__annotate_zero_histograms(sym);
}
static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
{
- struct source_line *line;
-
- if (syme != sym_filter_entry)
- return;
+ struct annotation *notes;
+ struct symbol *sym;
- if (pthread_mutex_trylock(&syme->src->lock))
+ if (syme != top.sym_filter_entry)
return;
- if (syme->src == NULL || syme->src->source == NULL)
- goto out_unlock;
-
- for (line = syme->src->lines; line; line = line->next) {
- /* skip lines without IP info */
- if (line->eip == 0)
- continue;
- if (line->eip == ip) {
- line->count[counter]++;
- break;
- }
- if (line->eip > ip)
- break;
- }
-out_unlock:
- pthread_mutex_unlock(&syme->src->lock);
-}
-
-#define PATTERN_LEN (BITS_PER_LONG / 4 + 2)
-
-static void lookup_sym_source(struct sym_entry *syme)
-{
- struct symbol *symbol = sym_entry__symbol(syme);
- struct source_line *line;
- char pattern[PATTERN_LEN + 1];
-
- sprintf(pattern, "%0*" PRIx64 " <", BITS_PER_LONG / 4,
- map__rip_2objdump(syme->map, symbol->start));
-
- pthread_mutex_lock(&syme->src->lock);
- for (line = syme->src->lines; line; line = line->next) {
- if (memcmp(line->line, pattern, PATTERN_LEN) == 0) {
- syme->src->source = line;
- break;
- }
- }
- pthread_mutex_unlock(&syme->src->lock);
-}
+ sym = sym_entry__symbol(syme);
+ notes = symbol__annotation(sym);
-static void show_lines(struct source_line *queue, int count, int total)
-{
- int i;
- struct source_line *line;
+ if (pthread_mutex_trylock(&notes->lock))
+ return;
- line = queue;
- for (i = 0; i < count; i++) {
- float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
+ ip = syme->map->map_ip(syme->map, ip);
+ symbol__inc_addr_samples(sym, syme->map, counter, ip);
- printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
- line = line->next;
- }
+ pthread_mutex_unlock(&notes->lock);
}
-#define TRACE_COUNT 3
-
static void show_details(struct sym_entry *syme)
{
+ struct annotation *notes;
struct symbol *symbol;
- struct source_line *line;
- struct source_line *line_queue = NULL;
- int displayed = 0;
- int line_queue_count = 0, total = 0, more = 0;
+ int more;
if (!syme)
return;
- if (!syme->src->source)
- lookup_sym_source(syme);
-
- if (!syme->src->source)
- return;
-
symbol = sym_entry__symbol(syme);
- printf("Showing %s for %s\n", event_name(sym_evsel), symbol->name);
- printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
-
- pthread_mutex_lock(&syme->src->lock);
- line = syme->src->source;
- while (line) {
- total += line->count[sym_counter];
- line = line->next;
- }
-
- line = syme->src->source;
- while (line) {
- float pcnt = 0.0;
-
- if (!line_queue_count)
- line_queue = line;
- line_queue_count++;
-
- if (line->count[sym_counter])
- pcnt = 100.0 * line->count[sym_counter] / (float)total;
- if (pcnt >= (float)sym_pcnt_filter) {
- if (displayed <= print_entries)
- show_lines(line_queue, line_queue_count, total);
- else more++;
- displayed += line_queue_count;
- line_queue_count = 0;
- line_queue = NULL;
- } else if (line_queue_count > TRACE_COUNT) {
- line_queue = line_queue->next;
- line_queue_count--;
- }
-
- line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
- line = line->next;
- }
- pthread_mutex_unlock(&syme->src->lock);
- if (more)
- printf("%d lines not displayed, maybe increase display entries [e]\n", more);
-}
+ notes = symbol__annotation(symbol);
-/*
- * Symbols will be added here in event__process_sample and will get out
- * after decayed.
- */
-static LIST_HEAD(active_symbols);
-static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
-
-/*
- * Ordering weight: count-1 * count-2 * ... / count-n
- */
-static double sym_weight(const struct sym_entry *sym)
-{
- double weight = sym->snap_count;
- int counter;
-
- if (!display_weighted)
- return weight;
+ pthread_mutex_lock(&notes->lock);
- for (counter = 1; counter < nr_counters-1; counter++)
- weight *= sym->count[counter];
+ if (notes->src == NULL)
+ goto out_unlock;
- weight /= (sym->count[counter] + 1);
+ printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name);
+ printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
- return weight;
+ more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx,
+ 0, sym_pcnt_filter, top.print_entries, 4);
+ if (top.zero)
+ symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx);
+ else
+ symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx);
+ if (more != 0)
+ printf("%d lines not displayed, maybe increase display entries [e]\n", more);
+out_unlock:
+ pthread_mutex_unlock(&notes->lock);
}
-static long samples;
-static long kernel_samples, us_samples;
-static long exact_samples;
-static long guest_us_samples, guest_kernel_samples;
static const char CONSOLE_CLEAR[] = "";
static void __list_insert_active_sym(struct sym_entry *syme)
{
- list_add(&syme->node, &active_symbols);
-}
-
-static void list_remove_active_sym(struct sym_entry *syme)
-{
- pthread_mutex_lock(&active_symbols_lock);
- list_del_init(&syme->node);
- pthread_mutex_unlock(&active_symbols_lock);
+ list_add(&syme->node, &top.active_symbols);
}
-static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
+static void print_sym_table(struct perf_session *session)
{
- struct rb_node **p = &tree->rb_node;
- struct rb_node *parent = NULL;
- struct sym_entry *iter;
-
- while (*p != NULL) {
- parent = *p;
- iter = rb_entry(parent, struct sym_entry, rb_node);
-
- if (se->weight > iter->weight)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
-
- rb_link_node(&se->rb_node, parent, p);
- rb_insert_color(&se->rb_node, tree);
-}
-
-static void print_sym_table(void)
-{
- int printed = 0, j;
- struct perf_evsel *counter;
- int snap = !display_weighted ? sym_counter : 0;
- float samples_per_sec = samples/delay_secs;
- float ksamples_per_sec = kernel_samples/delay_secs;
- float us_samples_per_sec = (us_samples)/delay_secs;
- float guest_kernel_samples_per_sec = (guest_kernel_samples)/delay_secs;
- float guest_us_samples_per_sec = (guest_us_samples)/delay_secs;
- float esamples_percent = (100.0*exact_samples)/samples;
- float sum_ksamples = 0.0;
- struct sym_entry *syme, *n;
- struct rb_root tmp = RB_ROOT;
+ char bf[160];
+ int printed = 0;
struct rb_node *nd;
- int sym_width = 0, dso_width = 0, dso_short_width = 0;
+ struct sym_entry *syme;
+ struct rb_root tmp = RB_ROOT;
const int win_width = winsize.ws_col - 1;
-
- samples = us_samples = kernel_samples = exact_samples = 0;
- guest_kernel_samples = guest_us_samples = 0;
-
- /* Sort the active symbols */
- pthread_mutex_lock(&active_symbols_lock);
- syme = list_entry(active_symbols.next, struct sym_entry, node);
- pthread_mutex_unlock(&active_symbols_lock);
-
- list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
- syme->snap_count = syme->count[snap];
- if (syme->snap_count != 0) {
-
- if ((hide_user_symbols &&
- syme->origin == PERF_RECORD_MISC_USER) ||
- (hide_kernel_symbols &&
- syme->origin == PERF_RECORD_MISC_KERNEL)) {
- list_remove_active_sym(syme);
- continue;
- }
- syme->weight = sym_weight(syme);
- rb_insert_active_sym(&tmp, syme);
- sum_ksamples += syme->snap_count;
-
- for (j = 0; j < nr_counters; j++)
- syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
- } else
- list_remove_active_sym(syme);
- }
+ int sym_width, dso_width, dso_short_width;
+ float sum_ksamples = perf_top__decay_samples(&top, &tmp);
puts(CONSOLE_CLEAR);
- printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
- if (!perf_guest) {
- printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
- " exact: %4.1f%% [",
- samples_per_sec,
- 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
- samples_per_sec)),
- esamples_percent);
- } else {
- printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%"
- " guest kernel:%4.1f%% guest us:%4.1f%%"
- " exact: %4.1f%% [",
- samples_per_sec,
- 100.0 - (100.0 * ((samples_per_sec-ksamples_per_sec) /
- samples_per_sec)),
- 100.0 - (100.0 * ((samples_per_sec-us_samples_per_sec) /
- samples_per_sec)),
- 100.0 - (100.0 * ((samples_per_sec -
- guest_kernel_samples_per_sec) /
- samples_per_sec)),
- 100.0 - (100.0 * ((samples_per_sec -
- guest_us_samples_per_sec) /
- samples_per_sec)),
- esamples_percent);
- }
-
- if (nr_counters == 1 || !display_weighted) {
- struct perf_evsel *first;
- first = list_entry(evsel_list.next, struct perf_evsel, node);
- printf("%" PRIu64, (uint64_t)first->attr.sample_period);
- if (freq)
- printf("Hz ");
- else
- printf(" ");
- }
-
- if (!display_weighted)
- printf("%s", event_name(sym_evsel));
- else list_for_each_entry(counter, &evsel_list, node) {
- if (counter->idx)
- printf("/");
-
- printf("%s", event_name(counter));
- }
+ perf_top__header_snprintf(&top, bf, sizeof(bf));
+ printf("%s\n", bf);
- printf( "], ");
-
- if (target_pid != -1)
- printf(" (target_pid: %d", target_pid);
- else if (target_tid != -1)
- printf(" (target_tid: %d", target_tid);
- else
- printf(" (all");
-
- if (cpu_list)
- printf(", CPU%s: %s)\n", cpus->nr > 1 ? "s" : "", cpu_list);
- else {
- if (target_tid != -1)
- printf(")\n");
- else
- printf(", %d CPU%s)\n", cpus->nr, cpus->nr > 1 ? "s" : "");
- }
+ perf_top__reset_sample_counters(&top);
printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
- if (sym_filter_entry) {
- show_details(sym_filter_entry);
- return;
+ if (session->hists.stats.total_lost != 0) {
+ color_fprintf(stdout, PERF_COLOR_RED, "WARNING:");
+ printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n",
+ session->hists.stats.total_lost);
}
- /*
- * Find the longest symbol name that will be displayed
- */
- for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
- syme = rb_entry(nd, struct sym_entry, rb_node);
- if (++printed > print_entries ||
- (int)syme->snap_count < count_filter)
- continue;
-
- if (syme->map->dso->long_name_len > dso_width)
- dso_width = syme->map->dso->long_name_len;
-
- if (syme->map->dso->short_name_len > dso_short_width)
- dso_short_width = syme->map->dso->short_name_len;
-
- if (syme->name_len > sym_width)
- sym_width = syme->name_len;
+ if (top.sym_filter_entry) {
+ show_details(top.sym_filter_entry);
+ return;
}
- printed = 0;
+ perf_top__find_widths(&top, &tmp, &dso_width, &dso_short_width,
+ &sym_width);
if (sym_width + dso_width > winsize.ws_col - 29) {
dso_width = dso_short_width;
@@ -606,7 +290,7 @@ static void print_sym_table(void)
sym_width = winsize.ws_col - dso_width - 29;
}
putchar('\n');
- if (nr_counters == 1)
+ if (top.evlist->nr_entries == 1)
printf(" samples pcnt");
else
printf(" weight samples pcnt");
@@ -615,7 +299,7 @@ static void print_sym_table(void)
printf(" RIP ");
printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
printf(" %s _______ _____",
- nr_counters == 1 ? " " : "______");
+ top.evlist->nr_entries == 1 ? " " : "______");
if (verbose)
printf(" ________________");
printf(" %-*.*s", sym_width, sym_width, graph_line);
@@ -628,13 +312,14 @@ static void print_sym_table(void)
syme = rb_entry(nd, struct sym_entry, rb_node);
sym = sym_entry__symbol(syme);
- if (++printed > print_entries || (int)syme->snap_count < count_filter)
+ if (++printed > top.print_entries ||
+ (int)syme->snap_count < top.count_filter)
continue;
pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
sum_ksamples));
- if (nr_counters == 1 || !display_weighted)
+ if (top.evlist->nr_entries == 1 || !top.display_weighted)
printf("%20.2f ", syme->weight);
else
printf("%9.1f %10ld ", syme->weight, syme->snap_count);
@@ -693,10 +378,8 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
/* zero counters of active symbol */
if (syme) {
- pthread_mutex_lock(&syme->src->lock);
__zero_source_counters(syme);
*target = NULL;
- pthread_mutex_unlock(&syme->src->lock);
}
fprintf(stdout, "\n%s: ", msg);
@@ -707,11 +390,11 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
if (p)
*p = 0;
- pthread_mutex_lock(&active_symbols_lock);
- syme = list_entry(active_symbols.next, struct sym_entry, node);
- pthread_mutex_unlock(&active_symbols_lock);
+ pthread_mutex_lock(&top.active_symbols_lock);
+ syme = list_entry(top.active_symbols.next, struct sym_entry, node);
+ pthread_mutex_unlock(&top.active_symbols_lock);
- list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
+ list_for_each_entry_safe_from(syme, n, &top.active_symbols, node) {
struct symbol *sym = sym_entry__symbol(syme);
if (!strcmp(buf, sym->name)) {
@@ -735,34 +418,34 @@ static void print_mapped_keys(void)
{
char *name = NULL;
- if (sym_filter_entry) {
- struct symbol *sym = sym_entry__symbol(sym_filter_entry);
+ if (top.sym_filter_entry) {
+ struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
name = sym->name;
}
fprintf(stdout, "\nMapped keys:\n");
- fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs);
- fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries);
+ fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top.delay_secs);
+ fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top.print_entries);
- if (nr_counters > 1)
- fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_evsel));
+ if (top.evlist->nr_entries > 1)
+ fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top.sym_evsel));
- fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
+ fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top.count_filter);
fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
fprintf(stdout, "\t[S] stop annotation.\n");
- if (nr_counters > 1)
- fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
+ if (top.evlist->nr_entries > 1)
+ fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", top.display_weighted ? 1 : 0);
fprintf(stdout,
"\t[K] hide kernel_symbols symbols. \t(%s)\n",
- hide_kernel_symbols ? "yes" : "no");
+ top.hide_kernel_symbols ? "yes" : "no");
fprintf(stdout,
"\t[U] hide user symbols. \t(%s)\n",
- hide_user_symbols ? "yes" : "no");
- fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0);
+ top.hide_user_symbols ? "yes" : "no");
+ fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top.zero ? 1 : 0);
fprintf(stdout, "\t[qQ] quit.\n");
}
@@ -783,7 +466,7 @@ static int key_mapped(int c)
return 1;
case 'E':
case 'w':
- return nr_counters > 1 ? 1 : 0;
+ return top.evlist->nr_entries > 1 ? 1 : 0;
default:
break;
}
@@ -818,47 +501,47 @@ static void handle_keypress(struct perf_session *session, int c)
switch (c) {
case 'd':
- prompt_integer(&delay_secs, "Enter display delay");
- if (delay_secs < 1)
- delay_secs = 1;
+ prompt_integer(&top.delay_secs, "Enter display delay");
+ if (top.delay_secs < 1)
+ top.delay_secs = 1;
break;
case 'e':
- prompt_integer(&print_entries, "Enter display entries (lines)");
- if (print_entries == 0) {
+ prompt_integer(&top.print_entries, "Enter display entries (lines)");
+ if (top.print_entries == 0) {
sig_winch_handler(SIGWINCH);
signal(SIGWINCH, sig_winch_handler);
} else
signal(SIGWINCH, SIG_DFL);
break;
case 'E':
- if (nr_counters > 1) {
+ if (top.evlist->nr_entries > 1) {
fprintf(stderr, "\nAvailable events:");
- list_for_each_entry(sym_evsel, &evsel_list, node)
- fprintf(stderr, "\n\t%d %s", sym_evsel->idx, event_name(sym_evsel));
+ list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
+ fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel));
- prompt_integer(&sym_counter, "Enter details event counter");
+ prompt_integer(&top.sym_counter, "Enter details event counter");
- if (sym_counter >= nr_counters) {
- sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
- sym_counter = 0;
- fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(sym_evsel));
+ if (top.sym_counter >= top.evlist->nr_entries) {
+ top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
+ top.sym_counter = 0;
+ fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel));
sleep(1);
break;
}
- list_for_each_entry(sym_evsel, &evsel_list, node)
- if (sym_evsel->idx == sym_counter)
+ list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
+ if (top.sym_evsel->idx == top.sym_counter)
break;
- } else sym_counter = 0;
+ } else top.sym_counter = 0;
break;
case 'f':
- prompt_integer(&count_filter, "Enter display event count filter");
+ prompt_integer(&top.count_filter, "Enter display event count filter");
break;
case 'F':
prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
break;
case 'K':
- hide_kernel_symbols = !hide_kernel_symbols;
+ top.hide_kernel_symbols = !top.hide_kernel_symbols;
break;
case 'q':
case 'Q':
@@ -867,34 +550,50 @@ static void handle_keypress(struct perf_session *session, int c)
perf_session__fprintf_dsos(session, stderr);
exit(0);
case 's':
- prompt_symbol(&sym_filter_entry, "Enter details symbol");
+ prompt_symbol(&top.sym_filter_entry, "Enter details symbol");
break;
case 'S':
- if (!sym_filter_entry)
+ if (!top.sym_filter_entry)
break;
else {
- struct sym_entry *syme = sym_filter_entry;
+ struct sym_entry *syme = top.sym_filter_entry;
- pthread_mutex_lock(&syme->src->lock);
- sym_filter_entry = NULL;
+ top.sym_filter_entry = NULL;
__zero_source_counters(syme);
- pthread_mutex_unlock(&syme->src->lock);
}
break;
case 'U':
- hide_user_symbols = !hide_user_symbols;
+ top.hide_user_symbols = !top.hide_user_symbols;
break;
case 'w':
- display_weighted = ~display_weighted;
+ top.display_weighted = ~top.display_weighted;
break;
case 'z':
- zero = !zero;
+ top.zero = !top.zero;
break;
default:
break;
}
}
+static void *display_thread_tui(void *arg __used)
+{
+ int err = 0;
+ pthread_mutex_lock(&top.active_symbols_lock);
+ while (list_empty(&top.active_symbols)) {
+ err = pthread_cond_wait(&top.active_symbols_cond,
+ &top.active_symbols_lock);
+ if (err)
+ break;
+ }
+ pthread_mutex_unlock(&top.active_symbols_lock);
+ if (!err)
+ perf_top__tui_browser(&top);
+ exit_browser(0);
+ exit(0);
+ return NULL;
+}
+
static void *display_thread(void *arg __used)
{
struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
@@ -909,13 +608,13 @@ static void *display_thread(void *arg __used)
tc.c_cc[VTIME] = 0;
repeat:
- delay_msecs = delay_secs * 1000;
+ delay_msecs = top.delay_secs * 1000;
tcsetattr(0, TCSANOW, &tc);
/* trash return*/
getc(stdin);
do {
- print_sym_table();
+ print_sym_table(session);
} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
c = getc(stdin);
@@ -930,6 +629,7 @@ repeat:
/* Tag samples to be skipped. */
static const char *skip_symbols[] = {
"default_idle",
+ "native_safe_halt",
"cpu_idle",
"enter_idle",
"exit_idle",
@@ -965,9 +665,9 @@ static int symbol_filter(struct map *map, struct symbol *sym)
syme = symbol__priv(sym);
syme->map = map;
- syme->src = NULL;
+ symbol__annotate_init(map, sym);
- if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
+ if (!top.sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
/* schedule initial sym_filter_entry setup */
sym_filter_entry_sched = syme;
sym_filter = NULL;
@@ -980,44 +680,40 @@ static int symbol_filter(struct map *map, struct symbol *sym)
}
}
- if (!syme->skip)
- syme->name_len = strlen(sym->name);
-
return 0;
}
-static void event__process_sample(const event_t *self,
- struct sample_data *sample,
- struct perf_session *session,
- struct perf_evsel *evsel)
+static void perf_event__process_sample(const union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_session *session)
{
- u64 ip = self->ip.ip;
+ u64 ip = event->ip.ip;
struct sym_entry *syme;
struct addr_location al;
struct machine *machine;
- u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- ++samples;
+ ++top.samples;
switch (origin) {
case PERF_RECORD_MISC_USER:
- ++us_samples;
- if (hide_user_symbols)
+ ++top.us_samples;
+ if (top.hide_user_symbols)
return;
machine = perf_session__find_host_machine(session);
break;
case PERF_RECORD_MISC_KERNEL:
- ++kernel_samples;
- if (hide_kernel_symbols)
+ ++top.kernel_samples;
+ if (top.hide_kernel_symbols)
return;
machine = perf_session__find_host_machine(session);
break;
case PERF_RECORD_MISC_GUEST_KERNEL:
- ++guest_kernel_samples;
- machine = perf_session__find_machine(session, self->ip.pid);
+ ++top.guest_kernel_samples;
+ machine = perf_session__find_machine(session, event->ip.pid);
break;
case PERF_RECORD_MISC_GUEST_USER:
- ++guest_us_samples;
+ ++top.guest_us_samples;
/*
* TODO: we don't process guest user from host side
* except simple counting.
@@ -1029,15 +725,15 @@ static void event__process_sample(const event_t *self,
if (!machine && perf_guest) {
pr_err("Can't find guest [%d]'s kernel information\n",
- self->ip.pid);
+ event->ip.pid);
return;
}
- if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
- exact_samples++;
+ if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
+ top.exact_samples++;
- if (event__preprocess_sample(self, session, &al, sample,
- symbol_filter) < 0 ||
+ if (perf_event__preprocess_sample(event, session, &al, sample,
+ symbol_filter) < 0 ||
al.filtered)
return;
@@ -1055,8 +751,9 @@ static void event__process_sample(const event_t *self,
*/
if (al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
- pr_err("The %s file can't be used\n",
- symbol_conf.vmlinux_name);
+ ui__warning("The %s file can't be used\n",
+ symbol_conf.vmlinux_name);
+ exit_browser(0);
exit(1);
}
@@ -1065,13 +762,13 @@ static void event__process_sample(const event_t *self,
/* let's see, whether we need to install initial sym_filter_entry */
if (sym_filter_entry_sched) {
- sym_filter_entry = sym_filter_entry_sched;
+ top.sym_filter_entry = sym_filter_entry_sched;
sym_filter_entry_sched = NULL;
- if (parse_source(sym_filter_entry) < 0) {
- struct symbol *sym = sym_entry__symbol(sym_filter_entry);
+ if (parse_source(top.sym_filter_entry) < 0) {
+ struct symbol *sym = sym_entry__symbol(top.sym_filter_entry);
pr_err("Can't annotate %s", sym->name);
- if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) {
+ if (top.sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) {
pr_err(": No vmlinux file was found in the path:\n");
machine__fprintf_vmlinux_path(machine, stderr);
} else
@@ -1082,166 +779,73 @@ static void event__process_sample(const event_t *self,
syme = symbol__priv(al.sym);
if (!syme->skip) {
- syme->count[evsel->idx]++;
+ struct perf_evsel *evsel;
+
syme->origin = origin;
+ evsel = perf_evlist__id2evsel(top.evlist, sample->id);
+ assert(evsel != NULL);
+ syme->count[evsel->idx]++;
record_precise_ip(syme, evsel->idx, ip);
- pthread_mutex_lock(&active_symbols_lock);
- if (list_empty(&syme->node) || !syme->node.next)
+ pthread_mutex_lock(&top.active_symbols_lock);
+ if (list_empty(&syme->node) || !syme->node.next) {
+ static bool first = true;
__list_insert_active_sym(syme);
- pthread_mutex_unlock(&active_symbols_lock);
+ if (first) {
+ pthread_cond_broadcast(&top.active_symbols_cond);
+ first = false;
+ }
+ }
+ pthread_mutex_unlock(&top.active_symbols_lock);
}
}
-struct mmap_data {
- void *base;
- int mask;
- unsigned int prev;
-};
-
-static int perf_evsel__alloc_mmap_per_thread(struct perf_evsel *evsel,
- int ncpus, int nthreads)
-{
- evsel->priv = xyarray__new(ncpus, nthreads, sizeof(struct mmap_data));
- return evsel->priv != NULL ? 0 : -ENOMEM;
-}
-
-static void perf_evsel__free_mmap(struct perf_evsel *evsel)
-{
- xyarray__delete(evsel->priv);
- evsel->priv = NULL;
-}
-
-static unsigned int mmap_read_head(struct mmap_data *md)
-{
- struct perf_event_mmap_page *pc = md->base;
- int head;
-
- head = pc->data_head;
- rmb();
-
- return head;
-}
-
-static void perf_session__mmap_read_counter(struct perf_session *self,
- struct perf_evsel *evsel,
- int cpu, int thread_idx)
+static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu)
{
- struct xyarray *mmap_array = evsel->priv;
- struct mmap_data *md = xyarray__entry(mmap_array, cpu, thread_idx);
- unsigned int head = mmap_read_head(md);
- unsigned int old = md->prev;
- unsigned char *data = md->base + page_size;
- struct sample_data sample;
- int diff;
-
- /*
- * If we're further behind than half the buffer, there's a chance
- * the writer will bite our tail and mess up the samples under us.
- *
- * If we somehow ended up ahead of the head, we got messed up.
- *
- * In either case, truncate and restart at head.
- */
- diff = head - old;
- if (diff > md->mask / 2 || diff < 0) {
- fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
-
- /*
- * head points to a known good entry, start there.
- */
- old = head;
- }
-
- for (; old != head;) {
- event_t *event = (event_t *)&data[old & md->mask];
-
- event_t event_copy;
+ struct perf_sample sample;
+ union perf_event *event;
- size_t size = event->header.size;
+ while ((event = perf_evlist__read_on_cpu(top.evlist, cpu)) != NULL) {
+ perf_session__parse_sample(self, event, &sample);
- /*
- * Event straddles the mmap boundary -- header should always
- * be inside due to u64 alignment of output.
- */
- if ((old & md->mask) + size != ((old + size) & md->mask)) {
- unsigned int offset = old;
- unsigned int len = min(sizeof(*event), size), cpy;
- void *dst = &event_copy;
-
- do {
- cpy = min(md->mask + 1 - (offset & md->mask), len);
- memcpy(dst, &data[offset & md->mask], cpy);
- offset += cpy;
- dst += cpy;
- len -= cpy;
- } while (len);
-
- event = &event_copy;
- }
-
- event__parse_sample(event, self, &sample);
if (event->header.type == PERF_RECORD_SAMPLE)
- event__process_sample(event, &sample, self, evsel);
+ perf_event__process_sample(event, &sample, self);
else
- event__process(event, &sample, self);
- old += size;
+ perf_event__process(event, &sample, self);
}
-
- md->prev = old;
}
-static struct pollfd *event_array;
-
static void perf_session__mmap_read(struct perf_session *self)
{
- struct perf_evsel *counter;
- int i, thread_index;
-
- for (i = 0; i < cpus->nr; i++) {
- list_for_each_entry(counter, &evsel_list, node) {
- for (thread_index = 0;
- thread_index < threads->nr;
- thread_index++) {
- perf_session__mmap_read_counter(self,
- counter, i, thread_index);
- }
- }
- }
-}
+ int i;
-int nr_poll;
-int group_fd;
+ for (i = 0; i < top.evlist->cpus->nr; i++)
+ perf_session__mmap_read_cpu(self, i);
+}
-static void start_counter(int i, struct perf_evsel *evsel)
+static void start_counters(struct perf_evlist *evlist)
{
- struct xyarray *mmap_array = evsel->priv;
- struct mmap_data *mm;
- struct perf_event_attr *attr;
- int cpu = -1;
- int thread_index;
-
- if (target_tid == -1)
- cpu = cpus->map[i];
+ struct perf_evsel *counter;
- attr = &evsel->attr;
+ list_for_each_entry(counter, &evlist->entries, node) {
+ struct perf_event_attr *attr = &counter->attr;
- attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+ attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
- if (freq) {
- attr->sample_type |= PERF_SAMPLE_PERIOD;
- attr->freq = 1;
- attr->sample_freq = freq;
- }
+ if (top.freq) {
+ attr->sample_type |= PERF_SAMPLE_PERIOD;
+ attr->freq = 1;
+ attr->sample_freq = top.freq;
+ }
- attr->inherit = (cpu < 0) && inherit;
- attr->mmap = 1;
+ if (evlist->nr_entries > 1) {
+ attr->sample_type |= PERF_SAMPLE_ID;
+ attr->read_format |= PERF_FORMAT_ID;
+ }
- for (thread_index = 0; thread_index < threads->nr; thread_index++) {
+ attr->mmap = 1;
try_again:
- FD(evsel, i, thread_index) = sys_perf_event_open(attr,
- threads->map[thread_index], cpu, group_fd, 0);
-
- if (FD(evsel, i, thread_index) < 0) {
+ if (perf_evsel__open(counter, top.evlist->cpus,
+ top.evlist->threads, group, inherit) < 0) {
int err = errno;
if (err == EPERM || err == EACCES)
@@ -1253,8 +857,8 @@ try_again:
* based cpu-clock-tick sw counter, which
* is always available even if no PMU support:
*/
- if (attr->type == PERF_TYPE_HARDWARE
- && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
+ if (attr->type == PERF_TYPE_HARDWARE &&
+ attr->config == PERF_COUNT_HW_CPU_CYCLES) {
if (verbose)
warning(" ... trying to fall back to cpu-clock-ticks\n");
@@ -1264,39 +868,22 @@ try_again:
goto try_again;
}
printf("\n");
- error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
- FD(evsel, i, thread_index), strerror(err));
+ error("sys_perf_event_open() syscall returned with %d "
+ "(%s). /bin/dmesg may provide additional information.\n",
+ err, strerror(err));
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
exit(-1);
}
- assert(FD(evsel, i, thread_index) >= 0);
- fcntl(FD(evsel, i, thread_index), F_SETFL, O_NONBLOCK);
-
- /*
- * First counter acts as the group leader:
- */
- if (group && group_fd == -1)
- group_fd = FD(evsel, i, thread_index);
-
- event_array[nr_poll].fd = FD(evsel, i, thread_index);
- event_array[nr_poll].events = POLLIN;
- nr_poll++;
-
- mm = xyarray__entry(mmap_array, i, thread_index);
- mm->prev = 0;
- mm->mask = mmap_pages*page_size - 1;
- mm->base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ, MAP_SHARED, FD(evsel, i, thread_index), 0);
- if (mm->base == MAP_FAILED)
- die("failed to mmap with %d (%s)\n", errno, strerror(errno));
}
+
+ if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
+ die("failed to mmap with %d (%s)\n", errno, strerror(errno));
}
static int __cmd_top(void)
{
pthread_t thread;
- struct perf_evsel *counter;
- int i, ret;
+ int ret __used;
/*
* FIXME: perf_session__new should allow passing a O_MMAP, so that all this
* mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
@@ -1305,23 +892,23 @@ static int __cmd_top(void)
if (session == NULL)
return -ENOMEM;
- if (target_tid != -1)
- event__synthesize_thread_map(threads, event__process, session);
+ if (top.target_tid != -1)
+ perf_event__synthesize_thread_map(top.evlist->threads,
+ perf_event__process, session);
else
- event__synthesize_threads(event__process, session);
+ perf_event__synthesize_threads(perf_event__process, session);
- for (i = 0; i < cpus->nr; i++) {
- group_fd = -1;
- list_for_each_entry(counter, &evsel_list, node)
- start_counter(i, counter);
- }
+ start_counters(top.evlist);
+ session->evlist = top.evlist;
+ perf_session__update_sample_type(session);
/* Wait for a minimal set of events before starting the snapshot */
- poll(&event_array[0], nr_poll, 100);
+ poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
perf_session__mmap_read(session);
- if (pthread_create(&thread, NULL, display_thread, session)) {
+ if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
+ display_thread), session)) {
printf("Could not create display thread.\n");
exit(-1);
}
@@ -1337,12 +924,12 @@ static int __cmd_top(void)
}
while (1) {
- int hits = samples;
+ u64 hits = top.samples;
perf_session__mmap_read(session);
- if (hits == samples)
- ret = poll(event_array, nr_poll, 100);
+ if (hits == top.samples)
+ ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
}
return 0;
@@ -1354,31 +941,31 @@ static const char * const top_usage[] = {
};
static const struct option options[] = {
- OPT_CALLBACK('e', "event", NULL, "event",
+ OPT_CALLBACK('e', "event", &top.evlist, "event",
"event selector. use 'perf list' to list available events",
parse_events),
OPT_INTEGER('c', "count", &default_interval,
"event period to sample"),
- OPT_INTEGER('p', "pid", &target_pid,
+ OPT_INTEGER('p', "pid", &top.target_pid,
"profile events on existing process id"),
- OPT_INTEGER('t', "tid", &target_tid,
+ OPT_INTEGER('t', "tid", &top.target_tid,
"profile events on existing thread id"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
- OPT_STRING('C', "cpu", &cpu_list, "cpu",
+ OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
"list of cpus to monitor"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
- OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols,
+ OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
"hide kernel symbols"),
OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
OPT_INTEGER('r', "realtime", &realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
- OPT_INTEGER('d', "delay", &delay_secs,
+ OPT_INTEGER('d', "delay", &top.delay_secs,
"number of seconds to delay between refreshes"),
OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
"dump the symbol table used for profiling"),
- OPT_INTEGER('f', "count-filter", &count_filter,
+ OPT_INTEGER('f', "count-filter", &top.count_filter,
"only display functions with more events than this"),
OPT_BOOLEAN('g', "group", &group,
"put the counters into a counter group"),
@@ -1386,14 +973,16 @@ static const struct option options[] = {
"child tasks inherit counters"),
OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
"symbol to annotate"),
- OPT_BOOLEAN('z', "zero", &zero,
+ OPT_BOOLEAN('z', "zero", &top.zero,
"zero history across updates"),
- OPT_INTEGER('F', "freq", &freq,
+ OPT_INTEGER('F', "freq", &top.freq,
"profile at this frequency"),
- OPT_INTEGER('E', "entries", &print_entries,
+ OPT_INTEGER('E', "entries", &top.print_entries,
"display this many functions"),
- OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols,
+ OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
"hide user symbols"),
+ OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
+ OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_END()
@@ -1404,64 +993,68 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
struct perf_evsel *pos;
int status = -ENOMEM;
+ top.evlist = perf_evlist__new(NULL, NULL);
+ if (top.evlist == NULL)
+ return -ENOMEM;
+
page_size = sysconf(_SC_PAGE_SIZE);
argc = parse_options(argc, argv, options, top_usage, 0);
if (argc)
usage_with_options(top_usage, options);
- if (target_pid != -1)
- target_tid = target_pid;
+ /*
+ * XXX For now start disabled, only using TUI if explicitely asked for.
+ * Change that when handle_keys equivalent gets written, live annotation
+ * done, etc.
+ */
+ use_browser = 0;
- threads = thread_map__new(target_pid, target_tid);
- if (threads == NULL) {
- pr_err("Problems finding threads of monitor\n");
- usage_with_options(top_usage, options);
- }
+ if (use_stdio)
+ use_browser = 0;
+ else if (use_tui)
+ use_browser = 1;
- event_array = malloc((sizeof(struct pollfd) *
- MAX_NR_CPUS * MAX_COUNTERS * threads->nr));
- if (!event_array)
- return -ENOMEM;
+ setup_browser(false);
/* CPU and PID are mutually exclusive */
- if (target_tid > 0 && cpu_list) {
+ if (top.target_tid > 0 && top.cpu_list) {
printf("WARNING: PID switch overriding CPU\n");
sleep(1);
- cpu_list = NULL;
+ top.cpu_list = NULL;
}
- if (!nr_counters && perf_evsel_list__create_default() < 0) {
+ if (top.target_pid != -1)
+ top.target_tid = top.target_pid;
+
+ if (perf_evlist__create_maps(top.evlist, top.target_pid,
+ top.target_tid, top.cpu_list) < 0)
+ usage_with_options(top_usage, options);
+
+ if (!top.evlist->nr_entries &&
+ perf_evlist__add_default(top.evlist) < 0) {
pr_err("Not enough memory for event selector list\n");
return -ENOMEM;
}
- if (delay_secs < 1)
- delay_secs = 1;
+ if (top.delay_secs < 1)
+ top.delay_secs = 1;
/*
* User specified count overrides default frequency.
*/
if (default_interval)
- freq = 0;
- else if (freq) {
- default_interval = freq;
+ top.freq = 0;
+ else if (top.freq) {
+ default_interval = top.freq;
} else {
fprintf(stderr, "frequency and count are zero, aborting\n");
exit(EXIT_FAILURE);
}
- if (target_tid != -1)
- cpus = cpu_map__dummy_new();
- else
- cpus = cpu_map__new(cpu_list);
-
- if (cpus == NULL)
- usage_with_options(top_usage, options);
-
- list_for_each_entry(pos, &evsel_list, node) {
- if (perf_evsel__alloc_mmap_per_thread(pos, cpus->nr, threads->nr) < 0 ||
- perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+ list_for_each_entry(pos, &top.evlist->entries, node) {
+ if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
+ top.evlist->threads->nr) < 0)
goto out_free_fd;
/*
* Fill in the ones not specifically initialized via -c:
@@ -1472,26 +1065,28 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
pos->attr.sample_period = default_interval;
}
- sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
+ if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
+ perf_evlist__alloc_mmap(top.evlist) < 0)
+ goto out_free_fd;
+
+ top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
- symbol_conf.priv_size = (sizeof(struct sym_entry) +
- (nr_counters + 1) * sizeof(unsigned long));
+ symbol_conf.priv_size = (sizeof(struct sym_entry) + sizeof(struct annotation) +
+ (top.evlist->nr_entries + 1) * sizeof(unsigned long));
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
if (symbol__init() < 0)
return -1;
get_term_dimensions(&winsize);
- if (print_entries == 0) {
+ if (top.print_entries == 0) {
update_print_entries(&winsize);
signal(SIGWINCH, sig_winch_handler);
}
status = __cmd_top();
out_free_fd:
- list_for_each_entry(pos, &evsel_list, node)
- perf_evsel__free_mmap(pos);
- perf_evsel_list__delete();
+ perf_evlist__delete(top.evlist);
return status;
}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 95aaf565c704..a5fc660c1f12 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -94,6 +94,32 @@ void get_term_dimensions(struct winsize *ws);
#include "util/types.h"
#include <stdbool.h>
+struct perf_mmap {
+ void *base;
+ int mask;
+ unsigned int prev;
+};
+
+static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
+{
+ struct perf_event_mmap_page *pc = mm->base;
+ int head = pc->data_head;
+ rmb();
+ return head;
+}
+
+static inline void perf_mmap__write_tail(struct perf_mmap *md,
+ unsigned long tail)
+{
+ struct perf_event_mmap_page *pc = md->base;
+
+ /*
+ * ensure all reads are done before we write the tail out.
+ */
+ /* mb(); */
+ pc->data_tail = tail;
+}
+
/*
* prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
* counters in the current task.
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
new file mode 100755
index 000000000000..df638c438a9f
--- /dev/null
+++ b/tools/perf/python/twatch.py
@@ -0,0 +1,41 @@
+#! /usr/bin/python
+# -*- python -*-
+# -*- coding: utf-8 -*-
+# twatch - Experimental use of the perf python interface
+# Copyright (C) 2011 Arnaldo Carvalho de Melo <acme@redhat.com>
+#
+# This application is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2.
+#
+# This application is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+
+import perf
+
+def main():
+ cpus = perf.cpu_map()
+ threads = perf.thread_map()
+ evsel = perf.evsel(task = 1, comm = 1, mmap = 0,
+ wakeup_events = 1, sample_period = 1,
+ sample_id_all = 1,
+ sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID)
+ evsel.open(cpus = cpus, threads = threads);
+ evlist = perf.evlist(cpus, threads)
+ evlist.add(evsel)
+ evlist.mmap()
+ while True:
+ evlist.poll(timeout = -1)
+ for cpu in cpus:
+ event = evlist.read_on_cpu(cpu)
+ if not event:
+ continue
+ print "cpu: %2d, pid: %4d, tid: %4d" % (event.sample_cpu,
+ event.sample_pid,
+ event.sample_tid),
+ print event
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
new file mode 100644
index 000000000000..0d0830c98cd7
--- /dev/null
+++ b/tools/perf/util/annotate.c
@@ -0,0 +1,605 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-annotate.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "util.h"
+#include "build-id.h"
+#include "color.h"
+#include "cache.h"
+#include "symbol.h"
+#include "debug.h"
+#include "annotate.h"
+#include <pthread.h>
+
+int symbol__annotate_init(struct map *map __used, struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ pthread_mutex_init(&notes->lock, NULL);
+ return 0;
+}
+
+int symbol__alloc_hist(struct symbol *sym, int nevents)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ size_t sizeof_sym_hist = (sizeof(struct sym_hist) +
+ (sym->end - sym->start) * sizeof(u64));
+
+ notes->src = zalloc(sizeof(*notes->src) + nevents * sizeof_sym_hist);
+ if (notes->src == NULL)
+ return -1;
+ notes->src->sizeof_sym_hist = sizeof_sym_hist;
+ notes->src->nr_histograms = nevents;
+ INIT_LIST_HEAD(&notes->src->source);
+ return 0;
+}
+
+void symbol__annotate_zero_histograms(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ pthread_mutex_lock(&notes->lock);
+ if (notes->src != NULL)
+ memset(notes->src->histograms, 0,
+ notes->src->nr_histograms * notes->src->sizeof_sym_hist);
+ pthread_mutex_unlock(&notes->lock);
+}
+
+int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
+ int evidx, u64 addr)
+{
+ unsigned offset;
+ struct annotation *notes;
+ struct sym_hist *h;
+
+ notes = symbol__annotation(sym);
+ if (notes->src == NULL)
+ return -ENOMEM;
+
+ pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr));
+
+ if (addr >= sym->end)
+ return 0;
+
+ offset = addr - sym->start;
+ h = annotation__histogram(notes, evidx);
+ h->sum++;
+ h->addr[offset]++;
+
+ pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
+ ", evidx=%d] => %" PRIu64 "\n", sym->start, sym->name,
+ addr, addr - sym->start, evidx, h->addr[offset]);
+ return 0;
+}
+
+static struct objdump_line *objdump_line__new(s64 offset, char *line, size_t privsize)
+{
+ struct objdump_line *self = malloc(sizeof(*self) + privsize);
+
+ if (self != NULL) {
+ self->offset = offset;
+ self->line = line;
+ }
+
+ return self;
+}
+
+void objdump_line__free(struct objdump_line *self)
+{
+ free(self->line);
+ free(self);
+}
+
+static void objdump__add_line(struct list_head *head, struct objdump_line *line)
+{
+ list_add_tail(&line->node, head);
+}
+
+struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
+ struct objdump_line *pos)
+{
+ list_for_each_entry_continue(pos, head, node)
+ if (pos->offset >= 0)
+ return pos;
+
+ return NULL;
+}
+
+static int objdump_line__print(struct objdump_line *oline, struct symbol *sym,
+ int evidx, u64 len, int min_pcnt,
+ int printed, int max_lines,
+ struct objdump_line *queue)
+{
+ static const char *prev_line;
+ static const char *prev_color;
+
+ if (oline->offset != -1) {
+ const char *path = NULL;
+ unsigned int hits = 0;
+ double percent = 0.0;
+ const char *color;
+ struct annotation *notes = symbol__annotation(sym);
+ struct source_line *src_line = notes->src->lines;
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+ s64 offset = oline->offset;
+ struct objdump_line *next;
+
+ next = objdump__get_next_ip_line(&notes->src->source, oline);
+
+ while (offset < (s64)len &&
+ (next == NULL || offset < next->offset)) {
+ if (src_line) {
+ if (path == NULL)
+ path = src_line[offset].path;
+ percent += src_line[offset].percent;
+ } else
+ hits += h->addr[offset];
+
+ ++offset;
+ }
+
+ if (src_line == NULL && h->sum)
+ percent = 100.0 * hits / h->sum;
+
+ if (percent < min_pcnt)
+ return -1;
+
+ if (max_lines && printed >= max_lines)
+ return 1;
+
+ if (queue != NULL) {
+ list_for_each_entry_from(queue, &notes->src->source, node) {
+ if (queue == oline)
+ break;
+ objdump_line__print(queue, sym, evidx, len,
+ 0, 0, 1, NULL);
+ }
+ }
+
+ color = get_percent_color(percent);
+
+ /*
+ * Also color the filename and line if needed, with
+ * the same color than the percentage. Don't print it
+ * twice for close colored addr with the same filename:line
+ */
+ if (path) {
+ if (!prev_line || strcmp(prev_line, path)
+ || color != prev_color) {
+ color_fprintf(stdout, color, " %s", path);
+ prev_line = path;
+ prev_color = color;
+ }
+ }
+
+ color_fprintf(stdout, color, " %7.2f", percent);
+ printf(" : ");
+ color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", oline->line);
+ } else if (max_lines && printed >= max_lines)
+ return 1;
+ else {
+ if (queue)
+ return -1;
+
+ if (!*oline->line)
+ printf(" :\n");
+ else
+ printf(" : %s\n", oline->line);
+ }
+
+ return 0;
+}
+
+static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
+ FILE *file, size_t privsize)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct objdump_line *objdump_line;
+ char *line = NULL, *tmp, *tmp2, *c;
+ size_t line_len;
+ s64 line_ip, offset = -1;
+
+ if (getline(&line, &line_len, file) < 0)
+ return -1;
+
+ if (!line)
+ return -1;
+
+ while (line_len != 0 && isspace(line[line_len - 1]))
+ line[--line_len] = '\0';
+
+ c = strchr(line, '\n');
+ if (c)
+ *c = 0;
+
+ line_ip = -1;
+
+ /*
+ * Strip leading spaces:
+ */
+ tmp = line;
+ while (*tmp) {
+ if (*tmp != ' ')
+ break;
+ tmp++;
+ }
+
+ if (*tmp) {
+ /*
+ * Parse hexa addresses followed by ':'
+ */
+ line_ip = strtoull(tmp, &tmp2, 16);
+ if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0')
+ line_ip = -1;
+ }
+
+ if (line_ip != -1) {
+ u64 start = map__rip_2objdump(map, sym->start),
+ end = map__rip_2objdump(map, sym->end);
+
+ offset = line_ip - start;
+ if (offset < 0 || (u64)line_ip > end)
+ offset = -1;
+ }
+
+ objdump_line = objdump_line__new(offset, line, privsize);
+ if (objdump_line == NULL) {
+ free(line);
+ return -1;
+ }
+ objdump__add_line(&notes->src->source, objdump_line);
+
+ return 0;
+}
+
+int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
+{
+ struct dso *dso = map->dso;
+ char *filename = dso__build_id_filename(dso, NULL, 0);
+ bool free_filename = true;
+ char command[PATH_MAX * 2];
+ FILE *file;
+ int err = 0;
+ char symfs_filename[PATH_MAX];
+
+ if (filename) {
+ snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
+ symbol_conf.symfs, filename);
+ }
+
+ if (filename == NULL) {
+ if (dso->has_build_id) {
+ pr_err("Can't annotate %s: not enough memory\n",
+ sym->name);
+ return -ENOMEM;
+ }
+ goto fallback;
+ } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
+ strstr(command, "[kernel.kallsyms]") ||
+ access(symfs_filename, R_OK)) {
+ free(filename);
+fallback:
+ /*
+ * If we don't have build-ids or the build-id file isn't in the
+ * cache, or is just a kallsyms file, well, lets hope that this
+ * DSO is the same as when 'perf record' ran.
+ */
+ filename = dso->long_name;
+ snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
+ symbol_conf.symfs, filename);
+ free_filename = false;
+ }
+
+ if (dso->origin == DSO__ORIG_KERNEL) {
+ char bf[BUILD_ID_SIZE * 2 + 16] = " with build id ";
+ char *build_id_msg = NULL;
+
+ if (dso->annotate_warned)
+ goto out_free_filename;
+
+ if (dso->has_build_id) {
+ build_id__sprintf(dso->build_id,
+ sizeof(dso->build_id), bf + 15);
+ build_id_msg = bf;
+ }
+ err = -ENOENT;
+ dso->annotate_warned = 1;
+ pr_err("Can't annotate %s: No vmlinux file%s was found in the "
+ "path.\nPlease use 'perf buildid-cache -av vmlinux' or "
+ "--vmlinux vmlinux.\n",
+ sym->name, build_id_msg ?: "");
+ goto out_free_filename;
+ }
+
+ pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
+ filename, sym->name, map->unmap_ip(map, sym->start),
+ map->unmap_ip(map, sym->end));
+
+ pr_debug("annotating [%p] %30s : [%p] %30s\n",
+ dso, dso->long_name, sym, sym->name);
+
+ snprintf(command, sizeof(command),
+ "objdump --start-address=0x%016" PRIx64
+ " --stop-address=0x%016" PRIx64 " -dS -C %s|grep -v %s|expand",
+ map__rip_2objdump(map, sym->start),
+ map__rip_2objdump(map, sym->end),
+ symfs_filename, filename);
+
+ pr_debug("Executing: %s\n", command);
+
+ file = popen(command, "r");
+ if (!file)
+ goto out_free_filename;
+
+ while (!feof(file))
+ if (symbol__parse_objdump_line(sym, map, file, privsize) < 0)
+ break;
+
+ pclose(file);
+out_free_filename:
+ if (free_filename)
+ free(filename);
+ return err;
+}
+
+static void insert_source_line(struct rb_root *root, struct source_line *src_line)
+{
+ struct source_line *iter;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct source_line, node);
+
+ if (src_line->percent > iter->percent)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&src_line->node, parent, p);
+ rb_insert_color(&src_line->node, root);
+}
+
+static void symbol__free_source_line(struct symbol *sym, int len)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct source_line *src_line = notes->src->lines;
+ int i;
+
+ for (i = 0; i < len; i++)
+ free(src_line[i].path);
+
+ free(src_line);
+ notes->src->lines = NULL;
+}
+
+/* Get the filename:line for the colored entries */
+static int symbol__get_source_line(struct symbol *sym, struct map *map,
+ int evidx, struct rb_root *root, int len,
+ const char *filename)
+{
+ u64 start;
+ int i;
+ char cmd[PATH_MAX * 2];
+ struct source_line *src_line;
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+
+ if (!h->sum)
+ return 0;
+
+ src_line = notes->src->lines = calloc(len, sizeof(struct source_line));
+ if (!notes->src->lines)
+ return -1;
+
+ start = map->unmap_ip(map, sym->start);
+
+ for (i = 0; i < len; i++) {
+ char *path = NULL;
+ size_t line_len;
+ u64 offset;
+ FILE *fp;
+
+ src_line[i].percent = 100.0 * h->addr[i] / h->sum;
+ if (src_line[i].percent <= 0.5)
+ continue;
+
+ offset = start + i;
+ sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset);
+ fp = popen(cmd, "r");
+ if (!fp)
+ continue;
+
+ if (getline(&path, &line_len, fp) < 0 || !line_len)
+ goto next;
+
+ src_line[i].path = malloc(sizeof(char) * line_len + 1);
+ if (!src_line[i].path)
+ goto next;
+
+ strcpy(src_line[i].path, path);
+ insert_source_line(root, &src_line[i]);
+
+ next:
+ pclose(fp);
+ }
+
+ return 0;
+}
+
+static void print_summary(struct rb_root *root, const char *filename)
+{
+ struct source_line *src_line;
+ struct rb_node *node;
+
+ printf("\nSorted summary for file %s\n", filename);
+ printf("----------------------------------------------\n\n");
+
+ if (RB_EMPTY_ROOT(root)) {
+ printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
+ return;
+ }
+
+ node = rb_first(root);
+ while (node) {
+ double percent;
+ const char *color;
+ char *path;
+
+ src_line = rb_entry(node, struct source_line, node);
+ percent = src_line->percent;
+ color = get_percent_color(percent);
+ path = src_line->path;
+
+ color_fprintf(stdout, color, " %7.2f %s", percent, path);
+ node = rb_next(node);
+ }
+}
+
+static void symbol__annotate_hits(struct symbol *sym, int evidx)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+ u64 len = sym->end - sym->start, offset;
+
+ for (offset = 0; offset < len; ++offset)
+ if (h->addr[offset] != 0)
+ printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
+ sym->start + offset, h->addr[offset]);
+ printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum);
+}
+
+int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx,
+ bool full_paths, int min_pcnt, int max_lines,
+ int context)
+{
+ struct dso *dso = map->dso;
+ const char *filename = dso->long_name, *d_filename;
+ struct annotation *notes = symbol__annotation(sym);
+ struct objdump_line *pos, *queue = NULL;
+ int printed = 2, queue_len = 0;
+ int more = 0;
+ u64 len;
+
+ if (full_paths)
+ d_filename = filename;
+ else
+ d_filename = basename(filename);
+
+ len = sym->end - sym->start;
+
+ printf(" Percent | Source code & Disassembly of %s\n", d_filename);
+ printf("------------------------------------------------\n");
+
+ if (verbose)
+ symbol__annotate_hits(sym, evidx);
+
+ list_for_each_entry(pos, &notes->src->source, node) {
+ if (context && queue == NULL) {
+ queue = pos;
+ queue_len = 0;
+ }
+
+ switch (objdump_line__print(pos, sym, evidx, len, min_pcnt,
+ printed, max_lines, queue)) {
+ case 0:
+ ++printed;
+ if (context) {
+ printed += queue_len;
+ queue = NULL;
+ queue_len = 0;
+ }
+ break;
+ case 1:
+ /* filtered by max_lines */
+ ++more;
+ break;
+ case -1:
+ default:
+ /*
+ * Filtered by min_pcnt or non IP lines when
+ * context != 0
+ */
+ if (!context)
+ break;
+ if (queue_len == context)
+ queue = list_entry(queue->node.next, typeof(*queue), node);
+ else
+ ++queue_len;
+ break;
+ }
+ }
+
+ return more;
+}
+
+void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+
+ memset(h, 0, notes->src->sizeof_sym_hist);
+}
+
+void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+ struct objdump_line *pos;
+ int len = sym->end - sym->start;
+
+ h->sum = 0;
+
+ list_for_each_entry(pos, &notes->src->source, node) {
+ if (pos->offset != -1 && pos->offset < len) {
+ h->addr[pos->offset] = h->addr[pos->offset] * 7 / 8;
+ h->sum += h->addr[pos->offset];
+ }
+ }
+}
+
+void objdump_line_list__purge(struct list_head *head)
+{
+ struct objdump_line *pos, *n;
+
+ list_for_each_entry_safe(pos, n, head, node) {
+ list_del(&pos->node);
+ objdump_line__free(pos);
+ }
+}
+
+int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx,
+ bool print_lines, bool full_paths, int min_pcnt,
+ int max_lines)
+{
+ struct dso *dso = map->dso;
+ const char *filename = dso->long_name;
+ struct rb_root source_line = RB_ROOT;
+ u64 len;
+
+ if (symbol__annotate(sym, map, 0) < 0)
+ return -1;
+
+ len = sym->end - sym->start;
+
+ if (print_lines) {
+ symbol__get_source_line(sym, map, evidx, &source_line,
+ len, filename);
+ print_summary(&source_line, filename);
+ }
+
+ symbol__annotate_printf(sym, map, evidx, full_paths,
+ min_pcnt, max_lines, 0);
+ if (print_lines)
+ symbol__free_source_line(sym, len);
+
+ objdump_line_list__purge(&symbol__annotation(sym)->src->source);
+
+ return 0;
+}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
new file mode 100644
index 000000000000..c2c286896801
--- /dev/null
+++ b/tools/perf/util/annotate.h
@@ -0,0 +1,103 @@
+#ifndef __PERF_ANNOTATE_H
+#define __PERF_ANNOTATE_H
+
+#include <stdbool.h>
+#include "types.h"
+#include "symbol.h"
+#include <linux/list.h>
+#include <linux/rbtree.h>
+
+struct objdump_line {
+ struct list_head node;
+ s64 offset;
+ char *line;
+};
+
+void objdump_line__free(struct objdump_line *self);
+struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
+ struct objdump_line *pos);
+
+struct sym_hist {
+ u64 sum;
+ u64 addr[0];
+};
+
+struct source_line {
+ struct rb_node node;
+ double percent;
+ char *path;
+};
+
+/** struct annotated_source - symbols with hits have this attached as in sannotation
+ *
+ * @histogram: Array of addr hit histograms per event being monitored
+ * @lines: If 'print_lines' is specified, per source code line percentages
+ * @source: source parsed from objdump -dS
+ *
+ * lines is allocated, percentages calculated and all sorted by percentage
+ * when the annotation is about to be presented, so the percentages are for
+ * one of the entries in the histogram array, i.e. for the event/counter being
+ * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate
+ * returns.
+ */
+struct annotated_source {
+ struct list_head source;
+ struct source_line *lines;
+ int nr_histograms;
+ int sizeof_sym_hist;
+ struct sym_hist histograms[0];
+};
+
+struct annotation {
+ pthread_mutex_t lock;
+ struct annotated_source *src;
+};
+
+struct sannotation {
+ struct annotation annotation;
+ struct symbol symbol;
+};
+
+static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
+{
+ return (((void *)&notes->src->histograms) +
+ (notes->src->sizeof_sym_hist * idx));
+}
+
+static inline struct annotation *symbol__annotation(struct symbol *sym)
+{
+ struct sannotation *a = container_of(sym, struct sannotation, symbol);
+ return &a->annotation;
+}
+
+int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
+ int evidx, u64 addr);
+int symbol__alloc_hist(struct symbol *sym, int nevents);
+void symbol__annotate_zero_histograms(struct symbol *sym);
+
+int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
+int symbol__annotate_init(struct map *map __used, struct symbol *sym);
+int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx,
+ bool full_paths, int min_pcnt, int max_lines,
+ int context);
+void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
+void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
+void objdump_line_list__purge(struct list_head *head);
+
+int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx,
+ bool print_lines, bool full_paths, int min_pcnt,
+ int max_lines);
+
+#ifdef NO_NEWT_SUPPORT
+static inline int symbol__tui_annotate(struct symbol *sym __used,
+ struct map *map __used,
+ int evidx __used, int refresh __used)
+{
+ return 0;
+}
+#else
+int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
+ int refresh);
+#endif
+
+#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index deffb8c96071..31f934af9861 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -14,8 +14,8 @@
#include <linux/kernel.h>
#include "debug.h"
-static int build_id__mark_dso_hit(event_t *event,
- struct sample_data *sample __used,
+static int build_id__mark_dso_hit(union perf_event *event,
+ struct perf_sample *sample __used,
struct perf_session *session)
{
struct addr_location al;
@@ -37,13 +37,14 @@ static int build_id__mark_dso_hit(event_t *event,
return 0;
}
-static int event__exit_del_thread(event_t *self, struct sample_data *sample __used,
- struct perf_session *session)
+static int perf_event__exit_del_thread(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
- struct thread *thread = perf_session__findnew(session, self->fork.tid);
+ struct thread *thread = perf_session__findnew(session, event->fork.tid);
- dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid,
- self->fork.ppid, self->fork.ptid);
+ dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
+ event->fork.ppid, event->fork.ptid);
if (thread) {
rb_erase(&thread->rb_node, &session->threads);
@@ -56,9 +57,9 @@ static int event__exit_del_thread(event_t *self, struct sample_data *sample __us
struct perf_event_ops build_id__mark_dso_hit_ops = {
.sample = build_id__mark_dso_hit,
- .mmap = event__process_mmap,
- .fork = event__process_task,
- .exit = event__exit_del_thread,
+ .mmap = perf_event__process_mmap,
+ .fork = perf_event__process_task,
+ .exit = perf_event__exit_del_thread,
};
char *dso__build_id_filename(struct dso *self, char *bf, size_t size)
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index a7729797fd96..fc5e5a09d5b9 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -34,13 +34,14 @@ extern int pager_use_color;
extern int use_browser;
#ifdef NO_NEWT_SUPPORT
-static inline void setup_browser(void)
+static inline void setup_browser(bool fallback_to_pager)
{
- setup_pager();
+ if (fallback_to_pager)
+ setup_pager();
}
static inline void exit_browser(bool wait_for_ok __used) {}
#else
-void setup_browser(void);
+void setup_browser(bool fallback_to_pager);
void exit_browser(bool wait_for_ok);
#endif
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index e12d539417b2..9f7106a8d9a4 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009-2010, Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com>
*
* Handle the callchains from the stream in an ad-hoc radix tree and then
* sort them in an rbtree.
@@ -18,7 +18,8 @@
#include "util.h"
#include "callchain.h"
-bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
+bool ip_callchain__valid(struct ip_callchain *chain,
+ const union perf_event *event)
{
unsigned int chain_size = event->header.size;
chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event;
@@ -26,10 +27,10 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
}
#define chain_for_each_child(child, parent) \
- list_for_each_entry(child, &parent->children, brothers)
+ list_for_each_entry(child, &parent->children, siblings)
#define chain_for_each_child_safe(child, next, parent) \
- list_for_each_entry_safe(child, next, &parent->children, brothers)
+ list_for_each_entry_safe(child, next, &parent->children, siblings)
static void
rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
@@ -38,14 +39,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct callchain_node *rnode;
- u64 chain_cumul = cumul_hits(chain);
+ u64 chain_cumul = callchain_cumul_hits(chain);
while (*p) {
u64 rnode_cumul;
parent = *p;
rnode = rb_entry(parent, struct callchain_node, rb_node);
- rnode_cumul = cumul_hits(rnode);
+ rnode_cumul = callchain_cumul_hits(rnode);
switch (mode) {
case CHAIN_FLAT:
@@ -104,7 +105,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
chain_for_each_child(child, node) {
__sort_chain_graph_abs(child, min_hit);
- if (cumul_hits(child) >= min_hit)
+ if (callchain_cumul_hits(child) >= min_hit)
rb_insert_callchain(&node->rb_root, child,
CHAIN_GRAPH_ABS);
}
@@ -129,7 +130,7 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
chain_for_each_child(child, node) {
__sort_chain_graph_rel(child, min_percent);
- if (cumul_hits(child) >= min_hit)
+ if (callchain_cumul_hits(child) >= min_hit)
rb_insert_callchain(&node->rb_root, child,
CHAIN_GRAPH_REL);
}
@@ -143,7 +144,7 @@ sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
rb_root->rb_node = chain_root->node.rb_root.rb_node;
}
-int register_callchain_param(struct callchain_param *param)
+int callchain_register_param(struct callchain_param *param)
{
switch (param->mode) {
case CHAIN_GRAPH_ABS:
@@ -189,32 +190,27 @@ create_child(struct callchain_node *parent, bool inherit_children)
chain_for_each_child(next, new)
next->parent = new;
}
- list_add_tail(&new->brothers, &parent->children);
+ list_add_tail(&new->siblings, &parent->children);
return new;
}
-struct resolved_ip {
- u64 ip;
- struct map_symbol ms;
-};
-
-struct resolved_chain {
- u64 nr;
- struct resolved_ip ips[0];
-};
-
-
/*
* Fill the node with callchain values
*/
static void
-fill_node(struct callchain_node *node, struct resolved_chain *chain, int start)
+fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
{
- unsigned int i;
+ struct callchain_cursor_node *cursor_node;
+
+ node->val_nr = cursor->nr - cursor->pos;
+ if (!node->val_nr)
+ pr_warning("Warning: empty node in callchain tree\n");
- for (i = start; i < chain->nr; i++) {
+ cursor_node = callchain_cursor_current(cursor);
+
+ while (cursor_node) {
struct callchain_list *call;
call = zalloc(sizeof(*call));
@@ -222,23 +218,25 @@ fill_node(struct callchain_node *node, struct resolved_chain *chain, int start)
perror("not enough memory for the code path tree");
return;
}
- call->ip = chain->ips[i].ip;
- call->ms = chain->ips[i].ms;
+ call->ip = cursor_node->ip;
+ call->ms.sym = cursor_node->sym;
+ call->ms.map = cursor_node->map;
list_add_tail(&call->list, &node->val);
+
+ callchain_cursor_advance(cursor);
+ cursor_node = callchain_cursor_current(cursor);
}
- node->val_nr = chain->nr - start;
- if (!node->val_nr)
- pr_warning("Warning: empty node in callchain tree\n");
}
static void
-add_child(struct callchain_node *parent, struct resolved_chain *chain,
- int start, u64 period)
+add_child(struct callchain_node *parent,
+ struct callchain_cursor *cursor,
+ u64 period)
{
struct callchain_node *new;
new = create_child(parent, false);
- fill_node(new, chain, start);
+ fill_node(new, cursor);
new->children_hit = 0;
new->hit = period;
@@ -250,9 +248,10 @@ add_child(struct callchain_node *parent, struct resolved_chain *chain,
* Then create another child to host the given callchain of new branch
*/
static void
-split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
- struct callchain_list *to_split, int idx_parents, int idx_local,
- u64 period)
+split_add_child(struct callchain_node *parent,
+ struct callchain_cursor *cursor,
+ struct callchain_list *to_split,
+ u64 idx_parents, u64 idx_local, u64 period)
{
struct callchain_node *new;
struct list_head *old_tail;
@@ -272,14 +271,14 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
/* split the hits */
new->hit = parent->hit;
new->children_hit = parent->children_hit;
- parent->children_hit = cumul_hits(new);
+ parent->children_hit = callchain_cumul_hits(new);
new->val_nr = parent->val_nr - idx_local;
parent->val_nr = idx_local;
/* create a new child for the new branch if any */
- if (idx_total < chain->nr) {
+ if (idx_total < cursor->nr) {
parent->hit = 0;
- add_child(parent, chain, idx_total, period);
+ add_child(parent, cursor, period);
parent->children_hit += period;
} else {
parent->hit = period;
@@ -287,36 +286,41 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
}
static int
-append_chain(struct callchain_node *root, struct resolved_chain *chain,
- unsigned int start, u64 period);
+append_chain(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period);
static void
-append_chain_children(struct callchain_node *root, struct resolved_chain *chain,
- unsigned int start, u64 period)
+append_chain_children(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period)
{
struct callchain_node *rnode;
/* lookup in childrens */
chain_for_each_child(rnode, root) {
- unsigned int ret = append_chain(rnode, chain, start, period);
+ unsigned int ret = append_chain(rnode, cursor, period);
if (!ret)
goto inc_children_hit;
}
/* nothing in children, add to the current node */
- add_child(root, chain, start, period);
+ add_child(root, cursor, period);
inc_children_hit:
root->children_hit += period;
}
static int
-append_chain(struct callchain_node *root, struct resolved_chain *chain,
- unsigned int start, u64 period)
+append_chain(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period)
{
+ struct callchain_cursor_node *curr_snap = cursor->curr;
struct callchain_list *cnode;
- unsigned int i = start;
+ u64 start = cursor->pos;
bool found = false;
+ u64 matches;
/*
* Lookup in the current node
@@ -324,141 +328,134 @@ append_chain(struct callchain_node *root, struct resolved_chain *chain,
* anywhere inside a function.
*/
list_for_each_entry(cnode, &root->val, list) {
+ struct callchain_cursor_node *node;
struct symbol *sym;
- if (i == chain->nr)
+ node = callchain_cursor_current(cursor);
+ if (!node)
break;
- sym = chain->ips[i].ms.sym;
+ sym = node->sym;
if (cnode->ms.sym && sym) {
if (cnode->ms.sym->start != sym->start)
break;
- } else if (cnode->ip != chain->ips[i].ip)
+ } else if (cnode->ip != node->ip)
break;
if (!found)
found = true;
- i++;
+
+ callchain_cursor_advance(cursor);
}
/* matches not, relay on the parent */
- if (!found)
+ if (!found) {
+ cursor->curr = curr_snap;
+ cursor->pos = start;
return -1;
+ }
+
+ matches = cursor->pos - start;
/* we match only a part of the node. Split it and add the new chain */
- if (i - start < root->val_nr) {
- split_add_child(root, chain, cnode, start, i - start, period);
+ if (matches < root->val_nr) {
+ split_add_child(root, cursor, cnode, start, matches, period);
return 0;
}
/* we match 100% of the path, increment the hit */
- if (i - start == root->val_nr && i == chain->nr) {
+ if (matches == root->val_nr && cursor->pos == cursor->nr) {
root->hit += period;
return 0;
}
/* We match the node and still have a part remaining */
- append_chain_children(root, chain, i, period);
+ append_chain_children(root, cursor, period);
return 0;
}
-static void filter_context(struct ip_callchain *old, struct resolved_chain *new,
- struct map_symbol *syms)
-{
- int i, j = 0;
-
- for (i = 0; i < (int)old->nr; i++) {
- if (old->ips[i] >= PERF_CONTEXT_MAX)
- continue;
-
- new->ips[j].ip = old->ips[i];
- new->ips[j].ms = syms[i];
- j++;
- }
-
- new->nr = j;
-}
-
-
-int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
- struct map_symbol *syms, u64 period)
+int callchain_append(struct callchain_root *root,
+ struct callchain_cursor *cursor,
+ u64 period)
{
- struct resolved_chain *filtered;
-
- if (!chain->nr)
+ if (!cursor->nr)
return 0;
- filtered = zalloc(sizeof(*filtered) +
- chain->nr * sizeof(struct resolved_ip));
- if (!filtered)
- return -ENOMEM;
-
- filter_context(chain, filtered, syms);
-
- if (!filtered->nr)
- goto end;
+ callchain_cursor_commit(cursor);
- append_chain_children(&root->node, filtered, 0, period);
+ append_chain_children(&root->node, cursor, period);
- if (filtered->nr > root->max_depth)
- root->max_depth = filtered->nr;
-end:
- free(filtered);
+ if (cursor->nr > root->max_depth)
+ root->max_depth = cursor->nr;
return 0;
}
static int
-merge_chain_branch(struct callchain_node *dst, struct callchain_node *src,
- struct resolved_chain *chain)
+merge_chain_branch(struct callchain_cursor *cursor,
+ struct callchain_node *dst, struct callchain_node *src)
{
+ struct callchain_cursor_node **old_last = cursor->last;
struct callchain_node *child, *next_child;
struct callchain_list *list, *next_list;
- int old_pos = chain->nr;
+ int old_pos = cursor->nr;
int err = 0;
list_for_each_entry_safe(list, next_list, &src->val, list) {
- chain->ips[chain->nr].ip = list->ip;
- chain->ips[chain->nr].ms = list->ms;
- chain->nr++;
+ callchain_cursor_append(cursor, list->ip,
+ list->ms.map, list->ms.sym);
list_del(&list->list);
free(list);
}
- if (src->hit)
- append_chain_children(dst, chain, 0, src->hit);
+ if (src->hit) {
+ callchain_cursor_commit(cursor);
+ append_chain_children(dst, cursor, src->hit);
+ }
chain_for_each_child_safe(child, next_child, src) {
- err = merge_chain_branch(dst, child, chain);
+ err = merge_chain_branch(cursor, dst, child);
if (err)
break;
- list_del(&child->brothers);
+ list_del(&child->siblings);
free(child);
}
- chain->nr = old_pos;
+ cursor->nr = old_pos;
+ cursor->last = old_last;
return err;
}
-int callchain_merge(struct callchain_root *dst, struct callchain_root *src)
+int callchain_merge(struct callchain_cursor *cursor,
+ struct callchain_root *dst, struct callchain_root *src)
+{
+ return merge_chain_branch(cursor, &dst->node, &src->node);
+}
+
+int callchain_cursor_append(struct callchain_cursor *cursor,
+ u64 ip, struct map *map, struct symbol *sym)
{
- struct resolved_chain *chain;
- int err;
+ struct callchain_cursor_node *node = *cursor->last;
- chain = malloc(sizeof(*chain) +
- src->max_depth * sizeof(struct resolved_ip));
- if (!chain)
- return -ENOMEM;
+ if (!node) {
+ node = calloc(sizeof(*node), 1);
+ if (!node)
+ return -ENOMEM;
- chain->nr = 0;
+ *cursor->last = node;
+ }
- err = merge_chain_branch(&dst->node, &src->node, chain);
+ node->ip = ip;
+ node->map = map;
+ node->sym = sym;
- free(chain);
+ cursor->nr++;
- return err;
+ cursor->last = &node->next;
+
+ return 0;
}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index c15fb8c24ad2..1a79df9f739f 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -16,7 +16,7 @@ enum chain_mode {
struct callchain_node {
struct callchain_node *parent;
- struct list_head brothers;
+ struct list_head siblings;
struct list_head children;
struct list_head val;
struct rb_node rb_node; /* to sort nodes in an rbtree */
@@ -49,9 +49,30 @@ struct callchain_list {
struct list_head list;
};
+/*
+ * A callchain cursor is a single linked list that
+ * let one feed a callchain progressively.
+ * It keeps persitent allocated entries to minimize
+ * allocations.
+ */
+struct callchain_cursor_node {
+ u64 ip;
+ struct map *map;
+ struct symbol *sym;
+ struct callchain_cursor_node *next;
+};
+
+struct callchain_cursor {
+ u64 nr;
+ struct callchain_cursor_node *first;
+ struct callchain_cursor_node **last;
+ u64 pos;
+ struct callchain_cursor_node *curr;
+};
+
static inline void callchain_init(struct callchain_root *root)
{
- INIT_LIST_HEAD(&root->node.brothers);
+ INIT_LIST_HEAD(&root->node.siblings);
INIT_LIST_HEAD(&root->node.children);
INIT_LIST_HEAD(&root->node.val);
@@ -61,15 +82,54 @@ static inline void callchain_init(struct callchain_root *root)
root->max_depth = 0;
}
-static inline u64 cumul_hits(struct callchain_node *node)
+static inline u64 callchain_cumul_hits(struct callchain_node *node)
{
return node->hit + node->children_hit;
}
-int register_callchain_param(struct callchain_param *param);
-int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
- struct map_symbol *syms, u64 period);
-int callchain_merge(struct callchain_root *dst, struct callchain_root *src);
+int callchain_register_param(struct callchain_param *param);
+int callchain_append(struct callchain_root *root,
+ struct callchain_cursor *cursor,
+ u64 period);
+
+int callchain_merge(struct callchain_cursor *cursor,
+ struct callchain_root *dst, struct callchain_root *src);
+
+bool ip_callchain__valid(struct ip_callchain *chain,
+ const union perf_event *event);
+/*
+ * Initialize a cursor before adding entries inside, but keep
+ * the previously allocated entries as a cache.
+ */
+static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
+{
+ cursor->nr = 0;
+ cursor->last = &cursor->first;
+}
+
+int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
+ struct map *map, struct symbol *sym);
-bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event);
+/* Close a cursor writing session. Initialize for the reader */
+static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
+{
+ cursor->curr = cursor->first;
+ cursor->pos = 0;
+}
+
+/* Cursor reading iteration helpers */
+static inline struct callchain_cursor_node *
+callchain_cursor_current(struct callchain_cursor *cursor)
+{
+ if (cursor->pos == cursor->nr)
+ return NULL;
+
+ return cursor->curr;
+}
+
+static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
+{
+ cursor->curr = cursor->curr->next;
+ cursor->pos++;
+}
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
new file mode 100644
index 000000000000..9fea75535221
--- /dev/null
+++ b/tools/perf/util/cgroup.c
@@ -0,0 +1,178 @@
+#include "util.h"
+#include "../perf.h"
+#include "parse-options.h"
+#include "evsel.h"
+#include "cgroup.h"
+#include "debugfs.h" /* MAX_PATH, STR() */
+#include "evlist.h"
+
+int nr_cgroups;
+
+static int
+cgroupfs_find_mountpoint(char *buf, size_t maxlen)
+{
+ FILE *fp;
+ char mountpoint[MAX_PATH+1], tokens[MAX_PATH+1], type[MAX_PATH+1];
+ char *token, *saved_ptr;
+ int found = 0;
+
+ fp = fopen("/proc/mounts", "r");
+ if (!fp)
+ return -1;
+
+ /*
+ * in order to handle split hierarchy, we need to scan /proc/mounts
+ * and inspect every cgroupfs mount point to find one that has
+ * perf_event subsystem
+ */
+ while (fscanf(fp, "%*s %"STR(MAX_PATH)"s %"STR(MAX_PATH)"s %"
+ STR(MAX_PATH)"s %*d %*d\n",
+ mountpoint, type, tokens) == 3) {
+
+ if (!strcmp(type, "cgroup")) {
+
+ token = strtok_r(tokens, ",", &saved_ptr);
+
+ while (token != NULL) {
+ if (!strcmp(token, "perf_event")) {
+ found = 1;
+ break;
+ }
+ token = strtok_r(NULL, ",", &saved_ptr);
+ }
+ }
+ if (found)
+ break;
+ }
+ fclose(fp);
+ if (!found)
+ return -1;
+
+ if (strlen(mountpoint) < maxlen) {
+ strcpy(buf, mountpoint);
+ return 0;
+ }
+ return -1;
+}
+
+static int open_cgroup(char *name)
+{
+ char path[MAX_PATH+1];
+ char mnt[MAX_PATH+1];
+ int fd;
+
+
+ if (cgroupfs_find_mountpoint(mnt, MAX_PATH+1))
+ return -1;
+
+ snprintf(path, MAX_PATH, "%s/%s", mnt, name);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ fprintf(stderr, "no access to cgroup %s\n", path);
+
+ return fd;
+}
+
+static int add_cgroup(struct perf_evlist *evlist, char *str)
+{
+ struct perf_evsel *counter;
+ struct cgroup_sel *cgrp = NULL;
+ int n;
+ /*
+ * check if cgrp is already defined, if so we reuse it
+ */
+ list_for_each_entry(counter, &evlist->entries, node) {
+ cgrp = counter->cgrp;
+ if (!cgrp)
+ continue;
+ if (!strcmp(cgrp->name, str))
+ break;
+
+ cgrp = NULL;
+ }
+
+ if (!cgrp) {
+ cgrp = zalloc(sizeof(*cgrp));
+ if (!cgrp)
+ return -1;
+
+ cgrp->name = str;
+
+ cgrp->fd = open_cgroup(str);
+ if (cgrp->fd == -1) {
+ free(cgrp);
+ return -1;
+ }
+ }
+
+ /*
+ * find corresponding event
+ * if add cgroup N, then need to find event N
+ */
+ n = 0;
+ list_for_each_entry(counter, &evlist->entries, node) {
+ if (n == nr_cgroups)
+ goto found;
+ n++;
+ }
+ if (cgrp->refcnt == 0)
+ free(cgrp);
+
+ return -1;
+found:
+ cgrp->refcnt++;
+ counter->cgrp = cgrp;
+ return 0;
+}
+
+void close_cgroup(struct cgroup_sel *cgrp)
+{
+ if (!cgrp)
+ return;
+
+ /* XXX: not reentrant */
+ if (--cgrp->refcnt == 0) {
+ close(cgrp->fd);
+ free(cgrp->name);
+ free(cgrp);
+ }
+}
+
+int parse_cgroups(const struct option *opt __used, const char *str,
+ int unset __used)
+{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+ const char *p, *e, *eos = str + strlen(str);
+ char *s;
+ int ret;
+
+ if (list_empty(&evlist->entries)) {
+ fprintf(stderr, "must define events before cgroups\n");
+ return -1;
+ }
+
+ for (;;) {
+ p = strchr(str, ',');
+ e = p ? p : eos;
+
+ /* allow empty cgroups, i.e., skip */
+ if (e - str) {
+ /* termination added */
+ s = strndup(str, e - str);
+ if (!s)
+ return -1;
+ ret = add_cgroup(evlist, s);
+ if (ret) {
+ free(s);
+ return -1;
+ }
+ }
+ /* nr_cgroups is increased een for empty cgroups */
+ nr_cgroups++;
+ if (!p)
+ break;
+ str = p+1;
+ }
+ return 0;
+}
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
new file mode 100644
index 000000000000..89acd6debdc5
--- /dev/null
+++ b/tools/perf/util/cgroup.h
@@ -0,0 +1,17 @@
+#ifndef __CGROUP_H__
+#define __CGROUP_H__
+
+struct option;
+
+struct cgroup_sel {
+ char *name;
+ int fd;
+ int refcnt;
+};
+
+
+extern int nr_cgroups; /* number of explicit cgroups defined */
+extern void close_cgroup(struct cgroup_sel *cgrp);
+extern int parse_cgroups(const struct option *opt, const char *str, int unset);
+
+#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 3ccaa1043383..6893eec693ab 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -177,3 +177,8 @@ struct cpu_map *cpu_map__dummy_new(void)
return cpus;
}
+
+void cpu_map__delete(struct cpu_map *map)
+{
+ free(map);
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index f7a4f42f6307..072c0a374794 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -8,6 +8,6 @@ struct cpu_map {
struct cpu_map *cpu_map__new(const char *cpu_list);
struct cpu_map *cpu_map__dummy_new(void);
-void *cpu_map__delete(struct cpu_map *map);
+void cpu_map__delete(struct cpu_map *map);
#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 01bbe8ecec3f..d4536a9e0d8c 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -57,7 +57,7 @@ void ui__warning(const char *format, ...)
}
#endif
-void trace_event(event_t *event)
+void trace_event(union perf_event *event)
{
unsigned char *raw_event = (void *)event;
const char *color = PERF_COLOR_BLUE;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index ca35fd66b5df..93516cf4682c 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -9,7 +9,7 @@ extern int verbose;
extern bool quiet, dump_trace;
int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
-void trace_event(event_t *event);
+void trace_event(union perf_event *event);
struct ui_progress;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 50d0a931497a..2b15c362ef56 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -6,8 +6,9 @@
#include "string.h"
#include "strlist.h"
#include "thread.h"
+#include "thread_map.h"
-static const char *event__name[] = {
+static const char *perf_event__names[] = {
[0] = "TOTAL",
[PERF_RECORD_MMAP] = "MMAP",
[PERF_RECORD_LOST] = "LOST",
@@ -25,16 +26,16 @@ static const char *event__name[] = {
[PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
};
-const char *event__get_event_name(unsigned int id)
+const char *perf_event__name(unsigned int id)
{
- if (id >= ARRAY_SIZE(event__name))
+ if (id >= ARRAY_SIZE(perf_event__names))
return "INVALID";
- if (!event__name[id])
+ if (!perf_event__names[id])
return "UNKNOWN";
- return event__name[id];
+ return perf_event__names[id];
}
-static struct sample_data synth_sample = {
+static struct perf_sample synth_sample = {
.pid = -1,
.tid = -1,
.time = -1,
@@ -43,9 +44,9 @@ static struct sample_data synth_sample = {
.period = 1,
};
-static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full,
- event__handler_t process,
- struct perf_session *session)
+static pid_t perf_event__synthesize_comm(union perf_event *event, pid_t pid,
+ int full, perf_event__handler_t process,
+ struct perf_session *session)
{
char filename[PATH_MAX];
char bf[BUFSIZ];
@@ -126,9 +127,10 @@ out:
return tgid;
}
-static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
- event__handler_t process,
- struct perf_session *session)
+static int perf_event__synthesize_mmap_events(union perf_event *event,
+ pid_t pid, pid_t tgid,
+ perf_event__handler_t process,
+ struct perf_session *session)
{
char filename[PATH_MAX];
FILE *fp;
@@ -199,14 +201,14 @@ static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
return 0;
}
-int event__synthesize_modules(event__handler_t process,
- struct perf_session *session,
- struct machine *machine)
+int perf_event__synthesize_modules(perf_event__handler_t process,
+ struct perf_session *session,
+ struct machine *machine)
{
struct rb_node *nd;
struct map_groups *kmaps = &machine->kmaps;
- event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
-
+ union perf_event *event = zalloc((sizeof(event->mmap) +
+ session->id_hdr_size));
if (event == NULL) {
pr_debug("Not enough memory synthesizing mmap event "
"for kernel modules\n");
@@ -251,23 +253,24 @@ int event__synthesize_modules(event__handler_t process,
return 0;
}
-static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event,
- pid_t pid, event__handler_t process,
+static int __event__synthesize_thread(union perf_event *comm_event,
+ union perf_event *mmap_event,
+ pid_t pid, perf_event__handler_t process,
struct perf_session *session)
{
- pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process,
+ pid_t tgid = perf_event__synthesize_comm(comm_event, pid, 1, process,
session);
if (tgid == -1)
return -1;
- return event__synthesize_mmap_events(mmap_event, pid, tgid,
+ return perf_event__synthesize_mmap_events(mmap_event, pid, tgid,
process, session);
}
-int event__synthesize_thread_map(struct thread_map *threads,
- event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_thread_map(struct thread_map *threads,
+ perf_event__handler_t process,
+ struct perf_session *session)
{
- event_t *comm_event, *mmap_event;
+ union perf_event *comm_event, *mmap_event;
int err = -1, thread;
comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
@@ -294,12 +297,12 @@ out:
return err;
}
-int event__synthesize_threads(event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_threads(perf_event__handler_t process,
+ struct perf_session *session)
{
DIR *proc;
struct dirent dirent, *next;
- event_t *comm_event, *mmap_event;
+ union perf_event *comm_event, *mmap_event;
int err = -1;
comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
@@ -357,10 +360,10 @@ static int find_symbol_cb(void *arg, const char *name, char type,
return 1;
}
-int event__synthesize_kernel_mmap(event__handler_t process,
- struct perf_session *session,
- struct machine *machine,
- const char *symbol_name)
+int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
+ struct perf_session *session,
+ struct machine *machine,
+ const char *symbol_name)
{
size_t size;
const char *filename, *mmap_name;
@@ -374,8 +377,8 @@ int event__synthesize_kernel_mmap(event__handler_t process,
* kernels.
*/
struct process_symbol_args args = { .name = symbol_name, };
- event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
-
+ union perf_event *event = zalloc((sizeof(event->mmap) +
+ session->id_hdr_size));
if (event == NULL) {
pr_debug("Not enough memory synthesizing mmap event "
"for kernel modules\n");
@@ -421,42 +424,15 @@ int event__synthesize_kernel_mmap(event__handler_t process,
return err;
}
-static void thread__comm_adjust(struct thread *self, struct hists *hists)
-{
- char *comm = self->comm;
-
- if (!symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
- (!symbol_conf.comm_list ||
- strlist__has_entry(symbol_conf.comm_list, comm))) {
- u16 slen = strlen(comm);
-
- if (hists__new_col_len(hists, HISTC_COMM, slen))
- hists__set_col_len(hists, HISTC_THREAD, slen + 6);
- }
-}
-
-static int thread__set_comm_adjust(struct thread *self, const char *comm,
- struct hists *hists)
+int perf_event__process_comm(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
- int ret = thread__set_comm(self, comm);
-
- if (ret)
- return ret;
-
- thread__comm_adjust(self, hists);
+ struct thread *thread = perf_session__findnew(session, event->comm.tid);
- return 0;
-}
+ dump_printf(": %s:%d\n", event->comm.comm, event->comm.tid);
-int event__process_comm(event_t *self, struct sample_data *sample __used,
- struct perf_session *session)
-{
- struct thread *thread = perf_session__findnew(session, self->comm.tid);
-
- dump_printf(": %s:%d\n", self->comm.comm, self->comm.tid);
-
- if (thread == NULL || thread__set_comm_adjust(thread, self->comm.comm,
- &session->hists)) {
+ if (thread == NULL || thread__set_comm(thread, event->comm.comm)) {
dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
return -1;
}
@@ -464,19 +440,21 @@ int event__process_comm(event_t *self, struct sample_data *sample __used,
return 0;
}
-int event__process_lost(event_t *self, struct sample_data *sample __used,
- struct perf_session *session)
+int perf_event__process_lost(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
- self->lost.id, self->lost.lost);
- session->hists.stats.total_lost += self->lost.lost;
+ event->lost.id, event->lost.lost);
+ session->hists.stats.total_lost += event->lost.lost;
return 0;
}
-static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
+static void perf_event__set_kernel_mmap_len(union perf_event *event,
+ struct map **maps)
{
- maps[MAP__FUNCTION]->start = self->mmap.start;
- maps[MAP__FUNCTION]->end = self->mmap.start + self->mmap.len;
+ maps[MAP__FUNCTION]->start = event->mmap.start;
+ maps[MAP__FUNCTION]->end = event->mmap.start + event->mmap.len;
/*
* Be a bit paranoid here, some perf.data file came with
* a zero sized synthesized MMAP event for the kernel.
@@ -485,8 +463,8 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
maps[MAP__FUNCTION]->end = ~0ULL;
}
-static int event__process_kernel_mmap(event_t *self,
- struct perf_session *session)
+static int perf_event__process_kernel_mmap(union perf_event *event,
+ struct perf_session *session)
{
struct map *map;
char kmmap_prefix[PATH_MAX];
@@ -494,9 +472,9 @@ static int event__process_kernel_mmap(event_t *self,
enum dso_kernel_type kernel_type;
bool is_kernel_mmap;
- machine = perf_session__findnew_machine(session, self->mmap.pid);
+ machine = perf_session__findnew_machine(session, event->mmap.pid);
if (!machine) {
- pr_err("Can't find id %d's machine\n", self->mmap.pid);
+ pr_err("Can't find id %d's machine\n", event->mmap.pid);
goto out_problem;
}
@@ -506,17 +484,17 @@ static int event__process_kernel_mmap(event_t *self,
else
kernel_type = DSO_TYPE_GUEST_KERNEL;
- is_kernel_mmap = memcmp(self->mmap.filename,
+ is_kernel_mmap = memcmp(event->mmap.filename,
kmmap_prefix,
strlen(kmmap_prefix)) == 0;
- if (self->mmap.filename[0] == '/' ||
- (!is_kernel_mmap && self->mmap.filename[0] == '[')) {
+ if (event->mmap.filename[0] == '/' ||
+ (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
char short_module_name[1024];
char *name, *dot;
- if (self->mmap.filename[0] == '/') {
- name = strrchr(self->mmap.filename, '/');
+ if (event->mmap.filename[0] == '/') {
+ name = strrchr(event->mmap.filename, '/');
if (name == NULL)
goto out_problem;
@@ -528,10 +506,10 @@ static int event__process_kernel_mmap(event_t *self,
"[%.*s]", (int)(dot - name), name);
strxfrchar(short_module_name, '-', '_');
} else
- strcpy(short_module_name, self->mmap.filename);
+ strcpy(short_module_name, event->mmap.filename);
- map = machine__new_module(machine, self->mmap.start,
- self->mmap.filename);
+ map = machine__new_module(machine, event->mmap.start,
+ event->mmap.filename);
if (map == NULL)
goto out_problem;
@@ -541,9 +519,9 @@ static int event__process_kernel_mmap(event_t *self,
map->dso->short_name = name;
map->dso->sname_alloc = 1;
- map->end = map->start + self->mmap.len;
+ map->end = map->start + event->mmap.len;
} else if (is_kernel_mmap) {
- const char *symbol_name = (self->mmap.filename +
+ const char *symbol_name = (event->mmap.filename +
strlen(kmmap_prefix));
/*
* Should be there already, from the build-id table in
@@ -558,10 +536,10 @@ static int event__process_kernel_mmap(event_t *self,
if (__machine__create_kernel_maps(machine, kernel) < 0)
goto out_problem;
- event_set_kernel_mmap_len(machine->vmlinux_maps, self);
+ perf_event__set_kernel_mmap_len(event, machine->vmlinux_maps);
perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
symbol_name,
- self->mmap.pgoff);
+ event->mmap.pgoff);
if (machine__is_default_guest(machine)) {
/*
* preload dso of guest kernel and modules
@@ -575,22 +553,23 @@ out_problem:
return -1;
}
-int event__process_mmap(event_t *self, struct sample_data *sample __used,
- struct perf_session *session)
+int perf_event__process_mmap(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
struct machine *machine;
struct thread *thread;
struct map *map;
- u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
int ret = 0;
dump_printf(" %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n",
- self->mmap.pid, self->mmap.tid, self->mmap.start,
- self->mmap.len, self->mmap.pgoff, self->mmap.filename);
+ event->mmap.pid, event->mmap.tid, event->mmap.start,
+ event->mmap.len, event->mmap.pgoff, event->mmap.filename);
if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
cpumode == PERF_RECORD_MISC_KERNEL) {
- ret = event__process_kernel_mmap(self, session);
+ ret = perf_event__process_kernel_mmap(event, session);
if (ret < 0)
goto out_problem;
return 0;
@@ -599,12 +578,12 @@ int event__process_mmap(event_t *self, struct sample_data *sample __used,
machine = perf_session__find_host_machine(session);
if (machine == NULL)
goto out_problem;
- thread = perf_session__findnew(session, self->mmap.pid);
+ thread = perf_session__findnew(session, event->mmap.pid);
if (thread == NULL)
goto out_problem;
- map = map__new(&machine->user_dsos, self->mmap.start,
- self->mmap.len, self->mmap.pgoff,
- self->mmap.pid, self->mmap.filename,
+ map = map__new(&machine->user_dsos, event->mmap.start,
+ event->mmap.len, event->mmap.pgoff,
+ event->mmap.pid, event->mmap.filename,
MAP__FUNCTION);
if (map == NULL)
goto out_problem;
@@ -617,16 +596,17 @@ out_problem:
return 0;
}
-int event__process_task(event_t *self, struct sample_data *sample __used,
- struct perf_session *session)
+int perf_event__process_task(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
- struct thread *thread = perf_session__findnew(session, self->fork.tid);
- struct thread *parent = perf_session__findnew(session, self->fork.ptid);
+ struct thread *thread = perf_session__findnew(session, event->fork.tid);
+ struct thread *parent = perf_session__findnew(session, event->fork.ptid);
- dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid,
- self->fork.ppid, self->fork.ptid);
+ dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
+ event->fork.ppid, event->fork.ptid);
- if (self->header.type == PERF_RECORD_EXIT) {
+ if (event->header.type == PERF_RECORD_EXIT) {
perf_session__remove_thread(session, thread);
return 0;
}
@@ -640,20 +620,22 @@ int event__process_task(event_t *self, struct sample_data *sample __used,
return 0;
}
-int event__process(event_t *event, struct sample_data *sample,
- struct perf_session *session)
+int perf_event__process(union perf_event *event, struct perf_sample *sample,
+ struct perf_session *session)
{
switch (event->header.type) {
case PERF_RECORD_COMM:
- event__process_comm(event, sample, session);
+ perf_event__process_comm(event, sample, session);
break;
case PERF_RECORD_MMAP:
- event__process_mmap(event, sample, session);
+ perf_event__process_mmap(event, sample, session);
break;
case PERF_RECORD_FORK:
case PERF_RECORD_EXIT:
- event__process_task(event, sample, session);
+ perf_event__process_task(event, sample, session);
break;
+ case PERF_RECORD_LOST:
+ perf_event__process_lost(event, sample, session);
default:
break;
}
@@ -750,24 +732,14 @@ void thread__find_addr_location(struct thread *self,
al->sym = NULL;
}
-static void dso__calc_col_width(struct dso *self, struct hists *hists)
-{
- if (!symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
- (!symbol_conf.dso_list ||
- strlist__has_entry(symbol_conf.dso_list, self->name))) {
- u16 slen = dso__name_len(self);
- hists__new_col_len(hists, HISTC_DSO, slen);
- }
-
- self->slen_calculated = 1;
-}
-
-int event__preprocess_sample(const event_t *self, struct perf_session *session,
- struct addr_location *al, struct sample_data *data,
- symbol_filter_t filter)
+int perf_event__preprocess_sample(const union perf_event *event,
+ struct perf_session *session,
+ struct addr_location *al,
+ struct perf_sample *sample,
+ symbol_filter_t filter)
{
- u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- struct thread *thread = perf_session__findnew(session, self->ip.pid);
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ struct thread *thread = perf_session__findnew(session, event->ip.pid);
if (thread == NULL)
return -1;
@@ -789,12 +761,12 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
machine__create_kernel_maps(&session->host_machine);
thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
- self->ip.pid, self->ip.ip, al);
+ event->ip.pid, event->ip.ip, al);
dump_printf(" ...... dso: %s\n",
al->map ? al->map->dso->long_name :
al->level == 'H' ? "[hypervisor]" : "<not found>");
al->sym = NULL;
- al->cpu = data->cpu;
+ al->cpu = sample->cpu;
if (al->map) {
if (symbol_conf.dso_list &&
@@ -805,23 +777,8 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
strlist__has_entry(symbol_conf.dso_list,
al->map->dso->long_name)))))
goto out_filtered;
- /*
- * We have to do this here as we may have a dso with no symbol
- * hit that has a name longer than the ones with symbols
- * sampled.
- */
- if (!sort_dso.elide && !al->map->dso->slen_calculated)
- dso__calc_col_width(al->map->dso, &session->hists);
al->sym = map__find_symbol(al->map, al->addr, filter);
- } else {
- const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
-
- if (hists__col_len(&session->hists, HISTC_DSO) < unresolved_col_width &&
- !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
- !symbol_conf.dso_list)
- hists__set_col_len(&session->hists, HISTC_DSO,
- unresolved_col_width);
}
if (symbol_conf.sym_list && al->sym &&
@@ -834,128 +791,3 @@ out_filtered:
al->filtered = true;
return 0;
}
-
-static int event__parse_id_sample(const event_t *event,
- struct perf_session *session,
- struct sample_data *sample)
-{
- const u64 *array;
- u64 type;
-
- sample->cpu = sample->pid = sample->tid = -1;
- sample->stream_id = sample->id = sample->time = -1ULL;
-
- if (!session->sample_id_all)
- return 0;
-
- array = event->sample.array;
- array += ((event->header.size -
- sizeof(event->header)) / sizeof(u64)) - 1;
- type = session->sample_type;
-
- if (type & PERF_SAMPLE_CPU) {
- u32 *p = (u32 *)array;
- sample->cpu = *p;
- array--;
- }
-
- if (type & PERF_SAMPLE_STREAM_ID) {
- sample->stream_id = *array;
- array--;
- }
-
- if (type & PERF_SAMPLE_ID) {
- sample->id = *array;
- array--;
- }
-
- if (type & PERF_SAMPLE_TIME) {
- sample->time = *array;
- array--;
- }
-
- if (type & PERF_SAMPLE_TID) {
- u32 *p = (u32 *)array;
- sample->pid = p[0];
- sample->tid = p[1];
- }
-
- return 0;
-}
-
-int event__parse_sample(const event_t *event, struct perf_session *session,
- struct sample_data *data)
-{
- const u64 *array;
- u64 type;
-
- if (event->header.type != PERF_RECORD_SAMPLE)
- return event__parse_id_sample(event, session, data);
-
- array = event->sample.array;
- type = session->sample_type;
-
- if (type & PERF_SAMPLE_IP) {
- data->ip = event->ip.ip;
- array++;
- }
-
- if (type & PERF_SAMPLE_TID) {
- u32 *p = (u32 *)array;
- data->pid = p[0];
- data->tid = p[1];
- array++;
- }
-
- if (type & PERF_SAMPLE_TIME) {
- data->time = *array;
- array++;
- }
-
- if (type & PERF_SAMPLE_ADDR) {
- data->addr = *array;
- array++;
- }
-
- data->id = -1ULL;
- if (type & PERF_SAMPLE_ID) {
- data->id = *array;
- array++;
- }
-
- if (type & PERF_SAMPLE_STREAM_ID) {
- data->stream_id = *array;
- array++;
- }
-
- if (type & PERF_SAMPLE_CPU) {
- u32 *p = (u32 *)array;
- data->cpu = *p;
- array++;
- } else
- data->cpu = -1;
-
- if (type & PERF_SAMPLE_PERIOD) {
- data->period = *array;
- array++;
- }
-
- if (type & PERF_SAMPLE_READ) {
- pr_debug("PERF_SAMPLE_READ is unsuported for now\n");
- return -1;
- }
-
- if (type & PERF_SAMPLE_CALLCHAIN) {
- data->callchain = (struct ip_callchain *)array;
- array += 1 + data->callchain->nr;
- }
-
- if (type & PERF_SAMPLE_RAW) {
- u32 *p = (u32 *)array;
- data->raw_size = *p;
- p++;
- data->raw_data = p;
- }
-
- return 0;
-}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index cc7b52f9b492..9c35170fb379 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -61,7 +61,7 @@ struct sample_event {
u64 array[];
};
-struct sample_data {
+struct perf_sample {
u64 ip;
u32 pid, tid;
u64 time;
@@ -117,7 +117,7 @@ struct tracing_data_event {
u32 size;
};
-typedef union event_union {
+union perf_event {
struct perf_event_header header;
struct ip_event ip;
struct mmap_event mmap;
@@ -130,50 +130,54 @@ typedef union event_union {
struct event_type_event event_type;
struct tracing_data_event tracing_data;
struct build_id_event build_id;
-} event_t;
+};
-void event__print_totals(void);
+void perf_event__print_totals(void);
struct perf_session;
struct thread_map;
-typedef int (*event__handler_synth_t)(event_t *event,
+typedef int (*perf_event__handler_synth_t)(union perf_event *event,
+ struct perf_session *session);
+typedef int (*perf_event__handler_t)(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session);
-typedef int (*event__handler_t)(event_t *event, struct sample_data *sample,
- struct perf_session *session);
-
-int event__synthesize_thread_map(struct thread_map *threads,
- event__handler_t process,
- struct perf_session *session);
-int event__synthesize_threads(event__handler_t process,
- struct perf_session *session);
-int event__synthesize_kernel_mmap(event__handler_t process,
- struct perf_session *session,
- struct machine *machine,
- const char *symbol_name);
-
-int event__synthesize_modules(event__handler_t process,
- struct perf_session *session,
- struct machine *machine);
-
-int event__process_comm(event_t *self, struct sample_data *sample,
- struct perf_session *session);
-int event__process_lost(event_t *self, struct sample_data *sample,
- struct perf_session *session);
-int event__process_mmap(event_t *self, struct sample_data *sample,
- struct perf_session *session);
-int event__process_task(event_t *self, struct sample_data *sample,
+
+int perf_event__synthesize_thread_map(struct thread_map *threads,
+ perf_event__handler_t process,
+ struct perf_session *session);
+int perf_event__synthesize_threads(perf_event__handler_t process,
+ struct perf_session *session);
+int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
+ struct perf_session *session,
+ struct machine *machine,
+ const char *symbol_name);
+
+int perf_event__synthesize_modules(perf_event__handler_t process,
+ struct perf_session *session,
+ struct machine *machine);
+
+int perf_event__process_comm(union perf_event *event, struct perf_sample *sample,
+ struct perf_session *session);
+int perf_event__process_lost(union perf_event *event, struct perf_sample *sample,
+ struct perf_session *session);
+int perf_event__process_mmap(union perf_event *event, struct perf_sample *sample,
+ struct perf_session *session);
+int perf_event__process_task(union perf_event *event, struct perf_sample *sample,
+ struct perf_session *session);
+int perf_event__process(union perf_event *event, struct perf_sample *sample,
struct perf_session *session);
-int event__process(event_t *event, struct sample_data *sample,
- struct perf_session *session);
struct addr_location;
-int event__preprocess_sample(const event_t *self, struct perf_session *session,
- struct addr_location *al, struct sample_data *data,
- symbol_filter_t filter);
-int event__parse_sample(const event_t *event, struct perf_session *session,
- struct sample_data *sample);
+int perf_event__preprocess_sample(const union perf_event *self,
+ struct perf_session *session,
+ struct addr_location *al,
+ struct perf_sample *sample,
+ symbol_filter_t filter);
+
+const char *perf_event__name(unsigned int id);
-const char *event__get_event_name(unsigned int id);
+int perf_event__parse_sample(const union perf_event *event, u64 type,
+ bool sample_id_all, struct perf_sample *sample);
#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
new file mode 100644
index 000000000000..d852cefa20de
--- /dev/null
+++ b/tools/perf/util/evlist.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include <poll.h>
+#include "cpumap.h"
+#include "thread_map.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "util.h"
+
+#include <sys/mman.h>
+
+#include <linux/bitops.h>
+#include <linux/hash.h>
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+
+void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
+ struct thread_map *threads)
+{
+ int i;
+
+ for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+ INIT_HLIST_HEAD(&evlist->heads[i]);
+ INIT_LIST_HEAD(&evlist->entries);
+ perf_evlist__set_maps(evlist, cpus, threads);
+}
+
+struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
+ struct thread_map *threads)
+{
+ struct perf_evlist *evlist = zalloc(sizeof(*evlist));
+
+ if (evlist != NULL)
+ perf_evlist__init(evlist, cpus, threads);
+
+ return evlist;
+}
+
+static void perf_evlist__purge(struct perf_evlist *evlist)
+{
+ struct perf_evsel *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &evlist->entries, node) {
+ list_del_init(&pos->node);
+ perf_evsel__delete(pos);
+ }
+
+ evlist->nr_entries = 0;
+}
+
+void perf_evlist__exit(struct perf_evlist *evlist)
+{
+ free(evlist->mmap);
+ free(evlist->pollfd);
+ evlist->mmap = NULL;
+ evlist->pollfd = NULL;
+}
+
+void perf_evlist__delete(struct perf_evlist *evlist)
+{
+ perf_evlist__purge(evlist);
+ perf_evlist__exit(evlist);
+ free(evlist);
+}
+
+void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
+{
+ list_add_tail(&entry->node, &evlist->entries);
+ ++evlist->nr_entries;
+}
+
+int perf_evlist__add_default(struct perf_evlist *evlist)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ };
+ struct perf_evsel *evsel = perf_evsel__new(&attr, 0);
+
+ if (evsel == NULL)
+ return -ENOMEM;
+
+ perf_evlist__add(evlist, evsel);
+ return 0;
+}
+
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
+{
+ int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries;
+ evlist->pollfd = malloc(sizeof(struct pollfd) * nfds);
+ return evlist->pollfd != NULL ? 0 : -ENOMEM;
+}
+
+void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
+{
+ fcntl(fd, F_SETFL, O_NONBLOCK);
+ evlist->pollfd[evlist->nr_fds].fd = fd;
+ evlist->pollfd[evlist->nr_fds].events = POLLIN;
+ evlist->nr_fds++;
+}
+
+static void perf_evlist__id_hash(struct perf_evlist *evlist,
+ struct perf_evsel *evsel,
+ int cpu, int thread, u64 id)
+{
+ int hash;
+ struct perf_sample_id *sid = SID(evsel, cpu, thread);
+
+ sid->id = id;
+ sid->evsel = evsel;
+ hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
+ hlist_add_head(&sid->node, &evlist->heads[hash]);
+}
+
+void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
+ int cpu, int thread, u64 id)
+{
+ perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
+ evsel->id[evsel->ids++] = id;
+}
+
+static int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+ struct perf_evsel *evsel,
+ int cpu, int thread, int fd)
+{
+ u64 read_data[4] = { 0, };
+ int id_idx = 1; /* The first entry is the counter value */
+
+ if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
+ read(fd, &read_data, sizeof(read_data)) == -1)
+ return -1;
+
+ if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ ++id_idx;
+ if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ ++id_idx;
+
+ perf_evlist__id_add(evlist, evsel, cpu, thread, read_data[id_idx]);
+ return 0;
+}
+
+struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
+{
+ struct hlist_head *head;
+ struct hlist_node *pos;
+ struct perf_sample_id *sid;
+ int hash;
+
+ if (evlist->nr_entries == 1)
+ return list_entry(evlist->entries.next, struct perf_evsel, node);
+
+ hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+ head = &evlist->heads[hash];
+
+ hlist_for_each_entry(sid, pos, head, node)
+ if (sid->id == id)
+ return sid->evsel;
+ return NULL;
+}
+
+union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu)
+{
+ /* XXX Move this to perf.c, making it generally available */
+ unsigned int page_size = sysconf(_SC_PAGE_SIZE);
+ struct perf_mmap *md = &evlist->mmap[cpu];
+ unsigned int head = perf_mmap__read_head(md);
+ unsigned int old = md->prev;
+ unsigned char *data = md->base + page_size;
+ union perf_event *event = NULL;
+
+ if (evlist->overwrite) {
+ /*
+ * If we're further behind than half the buffer, there's a chance
+ * the writer will bite our tail and mess up the samples under us.
+ *
+ * If we somehow ended up ahead of the head, we got messed up.
+ *
+ * In either case, truncate and restart at head.
+ */
+ int diff = head - old;
+ if (diff > md->mask / 2 || diff < 0) {
+ fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
+
+ /*
+ * head points to a known good entry, start there.
+ */
+ old = head;
+ }
+ }
+
+ if (old != head) {
+ size_t size;
+
+ event = (union perf_event *)&data[old & md->mask];
+ size = event->header.size;
+
+ /*
+ * Event straddles the mmap boundary -- header should always
+ * be inside due to u64 alignment of output.
+ */
+ if ((old & md->mask) + size != ((old + size) & md->mask)) {
+ unsigned int offset = old;
+ unsigned int len = min(sizeof(*event), size), cpy;
+ void *dst = &evlist->event_copy;
+
+ do {
+ cpy = min(md->mask + 1 - (offset & md->mask), len);
+ memcpy(dst, &data[offset & md->mask], cpy);
+ offset += cpy;
+ dst += cpy;
+ len -= cpy;
+ } while (len);
+
+ event = &evlist->event_copy;
+ }
+
+ old += size;
+ }
+
+ md->prev = old;
+
+ if (!evlist->overwrite)
+ perf_mmap__write_tail(md, old);
+
+ return event;
+}
+
+void perf_evlist__munmap(struct perf_evlist *evlist)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
+ if (evlist->mmap[cpu].base != NULL) {
+ munmap(evlist->mmap[cpu].base, evlist->mmap_len);
+ evlist->mmap[cpu].base = NULL;
+ }
+ }
+}
+
+int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
+{
+ evlist->mmap = zalloc(evlist->cpus->nr * sizeof(struct perf_mmap));
+ return evlist->mmap != NULL ? 0 : -ENOMEM;
+}
+
+static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot,
+ int mask, int fd)
+{
+ evlist->mmap[cpu].prev = 0;
+ evlist->mmap[cpu].mask = mask;
+ evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot,
+ MAP_SHARED, fd, 0);
+ if (evlist->mmap[cpu].base == MAP_FAILED)
+ return -1;
+
+ perf_evlist__add_pollfd(evlist, fd);
+ return 0;
+}
+
+/** perf_evlist__mmap - Create per cpu maps to receive events
+ *
+ * @evlist - list of events
+ * @pages - map length in pages
+ * @overwrite - overwrite older events?
+ *
+ * If overwrite is false the user needs to signal event consuption using:
+ *
+ * struct perf_mmap *m = &evlist->mmap[cpu];
+ * unsigned int head = perf_mmap__read_head(m);
+ *
+ * perf_mmap__write_tail(m, head)
+ *
+ * Using perf_evlist__read_on_cpu does this automatically.
+ */
+int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite)
+{
+ unsigned int page_size = sysconf(_SC_PAGE_SIZE);
+ int mask = pages * page_size - 1, cpu;
+ struct perf_evsel *first_evsel, *evsel;
+ const struct cpu_map *cpus = evlist->cpus;
+ const struct thread_map *threads = evlist->threads;
+ int thread, prot = PROT_READ | (overwrite ? 0 : PROT_WRITE);
+
+ if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
+ return -ENOMEM;
+
+ if (evlist->pollfd == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
+ return -ENOMEM;
+
+ evlist->overwrite = overwrite;
+ evlist->mmap_len = (pages + 1) * page_size;
+ first_evsel = list_entry(evlist->entries.next, struct perf_evsel, node);
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+ evsel->sample_id == NULL &&
+ perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0)
+ return -ENOMEM;
+
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ for (thread = 0; thread < threads->nr; thread++) {
+ int fd = FD(evsel, cpu, thread);
+
+ if (evsel->idx || thread) {
+ if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT,
+ FD(first_evsel, cpu, 0)) != 0)
+ goto out_unmap;
+ } else if (__perf_evlist__mmap(evlist, cpu, prot, mask, fd) < 0)
+ goto out_unmap;
+
+ if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+ perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0)
+ goto out_unmap;
+ }
+ }
+ }
+
+ return 0;
+
+out_unmap:
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ if (evlist->mmap[cpu].base != NULL) {
+ munmap(evlist->mmap[cpu].base, evlist->mmap_len);
+ evlist->mmap[cpu].base = NULL;
+ }
+ }
+ return -1;
+}
+
+int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
+ pid_t target_tid, const char *cpu_list)
+{
+ evlist->threads = thread_map__new(target_pid, target_tid);
+
+ if (evlist->threads == NULL)
+ return -1;
+
+ if (target_tid != -1)
+ evlist->cpus = cpu_map__dummy_new();
+ else
+ evlist->cpus = cpu_map__new(cpu_list);
+
+ if (evlist->cpus == NULL)
+ goto out_delete_threads;
+
+ return 0;
+
+out_delete_threads:
+ thread_map__delete(evlist->threads);
+ return -1;
+}
+
+void perf_evlist__delete_maps(struct perf_evlist *evlist)
+{
+ cpu_map__delete(evlist->cpus);
+ thread_map__delete(evlist->threads);
+ evlist->cpus = NULL;
+ evlist->threads = NULL;
+}
+
+int perf_evlist__set_filters(struct perf_evlist *evlist)
+{
+ const struct thread_map *threads = evlist->threads;
+ const struct cpu_map *cpus = evlist->cpus;
+ struct perf_evsel *evsel;
+ char *filter;
+ int thread;
+ int cpu;
+ int err;
+ int fd;
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ filter = evsel->filter;
+ if (!filter)
+ continue;
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ for (thread = 0; thread < threads->nr; thread++) {
+ fd = FD(evsel, cpu, thread);
+ err = ioctl(fd, PERF_EVENT_IOC_SET_FILTER, filter);
+ if (err)
+ return err;
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
new file mode 100644
index 000000000000..8b1cb7a4c5f1
--- /dev/null
+++ b/tools/perf/util/evlist.h
@@ -0,0 +1,68 @@
+#ifndef __PERF_EVLIST_H
+#define __PERF_EVLIST_H 1
+
+#include <linux/list.h>
+#include "../perf.h"
+#include "event.h"
+
+struct pollfd;
+struct thread_map;
+struct cpu_map;
+
+#define PERF_EVLIST__HLIST_BITS 8
+#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
+
+struct perf_evlist {
+ struct list_head entries;
+ struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
+ int nr_entries;
+ int nr_fds;
+ int mmap_len;
+ bool overwrite;
+ union perf_event event_copy;
+ struct perf_mmap *mmap;
+ struct pollfd *pollfd;
+ struct thread_map *threads;
+ struct cpu_map *cpus;
+};
+
+struct perf_evsel;
+
+struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
+ struct thread_map *threads);
+void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
+ struct thread_map *threads);
+void perf_evlist__exit(struct perf_evlist *evlist);
+void perf_evlist__delete(struct perf_evlist *evlist);
+
+void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
+int perf_evlist__add_default(struct perf_evlist *evlist);
+
+void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
+ int cpu, int thread, u64 id);
+
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
+void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
+
+struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
+
+union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu);
+
+int perf_evlist__alloc_mmap(struct perf_evlist *evlist);
+int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite);
+void perf_evlist__munmap(struct perf_evlist *evlist);
+
+static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
+ struct cpu_map *cpus,
+ struct thread_map *threads)
+{
+ evlist->cpus = cpus;
+ evlist->threads = threads;
+}
+
+int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
+ pid_t target_tid, const char *cpu_list);
+void perf_evlist__delete_maps(struct perf_evlist *evlist);
+int perf_evlist__set_filters(struct perf_evlist *evlist);
+
+#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d8575d31ee6c..662596afd7f1 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1,20 +1,34 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
#include "evsel.h"
-#include "../perf.h"
+#include "evlist.h"
#include "util.h"
#include "cpumap.h"
-#include "thread.h"
+#include "thread_map.h"
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+void perf_evsel__init(struct perf_evsel *evsel,
+ struct perf_event_attr *attr, int idx)
+{
+ evsel->idx = idx;
+ evsel->attr = *attr;
+ INIT_LIST_HEAD(&evsel->node);
+}
+
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
{
struct perf_evsel *evsel = zalloc(sizeof(*evsel));
- if (evsel != NULL) {
- evsel->idx = idx;
- evsel->attr = *attr;
- INIT_LIST_HEAD(&evsel->node);
- }
+ if (evsel != NULL)
+ perf_evsel__init(evsel, attr, idx);
return evsel;
}
@@ -25,6 +39,22 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
return evsel->fd != NULL ? 0 : -ENOMEM;
}
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+ evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
+ if (evsel->sample_id == NULL)
+ return -ENOMEM;
+
+ evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
+ if (evsel->id == NULL) {
+ xyarray__delete(evsel->sample_id);
+ evsel->sample_id = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
{
evsel->counts = zalloc((sizeof(*evsel->counts) +
@@ -38,6 +68,14 @@ void perf_evsel__free_fd(struct perf_evsel *evsel)
evsel->fd = NULL;
}
+void perf_evsel__free_id(struct perf_evsel *evsel)
+{
+ xyarray__delete(evsel->sample_id);
+ evsel->sample_id = NULL;
+ free(evsel->id);
+ evsel->id = NULL;
+}
+
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
int cpu, thread;
@@ -49,10 +87,19 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
}
}
-void perf_evsel__delete(struct perf_evsel *evsel)
+void perf_evsel__exit(struct perf_evsel *evsel)
{
assert(list_empty(&evsel->node));
xyarray__delete(evsel->fd);
+ xyarray__delete(evsel->sample_id);
+ free(evsel->id);
+}
+
+void perf_evsel__delete(struct perf_evsel *evsel)
+{
+ perf_evsel__exit(evsel);
+ close_cgroup(evsel->cgrp);
+ free(evsel->name);
free(evsel);
}
@@ -128,21 +175,51 @@ int __perf_evsel__read(struct perf_evsel *evsel,
}
static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
- struct thread_map *threads)
+ struct thread_map *threads, bool group, bool inherit)
{
int cpu, thread;
+ unsigned long flags = 0;
+ int pid = -1;
if (evsel->fd == NULL &&
perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
return -1;
+ if (evsel->cgrp) {
+ flags = PERF_FLAG_PID_CGROUP;
+ pid = evsel->cgrp->fd;
+ }
+
for (cpu = 0; cpu < cpus->nr; cpu++) {
+ int group_fd = -1;
+ /*
+ * Don't allow mmap() of inherited per-task counters. This
+ * would create a performance issue due to all children writing
+ * to the same buffer.
+ *
+ * FIXME:
+ * Proper fix is not to pass 'inherit' to perf_evsel__open*,
+ * but a 'flags' parameter, with 'group' folded there as well,
+ * then introduce a PERF_O_{MMAP,GROUP,INHERIT} enum, and if
+ * O_MMAP is set, emit a warning if cpu < 0 and O_INHERIT is
+ * set. Lets go for the minimal fix first tho.
+ */
+ evsel->attr.inherit = (cpus->map[cpu] >= 0) && inherit;
+
for (thread = 0; thread < threads->nr; thread++) {
+
+ if (!evsel->cgrp)
+ pid = threads->map[thread];
+
FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
- threads->map[thread],
- cpus->map[cpu], -1, 0);
+ pid,
+ cpus->map[cpu],
+ group_fd, flags);
if (FD(evsel, cpu, thread) < 0)
goto out_close;
+
+ if (group && group_fd == -1)
+ group_fd = FD(evsel, cpu, thread);
}
}
@@ -175,10 +252,9 @@ static struct {
.threads = { -1, },
};
-int perf_evsel__open(struct perf_evsel *evsel,
- struct cpu_map *cpus, struct thread_map *threads)
+int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+ struct thread_map *threads, bool group, bool inherit)
{
-
if (cpus == NULL) {
/* Work around old compiler warnings about strict aliasing */
cpus = &empty_cpu_map.map;
@@ -187,15 +263,135 @@ int perf_evsel__open(struct perf_evsel *evsel,
if (threads == NULL)
threads = &empty_thread_map.map;
- return __perf_evsel__open(evsel, cpus, threads);
+ return __perf_evsel__open(evsel, cpus, threads, group, inherit);
}
-int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
+ struct cpu_map *cpus, bool group, bool inherit)
{
- return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
+ return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, inherit);
+}
+
+int perf_evsel__open_per_thread(struct perf_evsel *evsel,
+ struct thread_map *threads, bool group, bool inherit)
+{
+ return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, inherit);
+}
+
+static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
+ struct perf_sample *sample)
+{
+ const u64 *array = event->sample.array;
+
+ array += ((event->header.size -
+ sizeof(event->header)) / sizeof(u64)) - 1;
+
+ if (type & PERF_SAMPLE_CPU) {
+ u32 *p = (u32 *)array;
+ sample->cpu = *p;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ sample->stream_id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_ID) {
+ sample->id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ sample->time = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u32 *p = (u32 *)array;
+ sample->pid = p[0];
+ sample->tid = p[1];
+ }
+
+ return 0;
}
-int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+int perf_event__parse_sample(const union perf_event *event, u64 type,
+ bool sample_id_all, struct perf_sample *data)
{
- return __perf_evsel__open(evsel, &empty_cpu_map.map, threads);
+ const u64 *array;
+
+ data->cpu = data->pid = data->tid = -1;
+ data->stream_id = data->id = data->time = -1ULL;
+
+ if (event->header.type != PERF_RECORD_SAMPLE) {
+ if (!sample_id_all)
+ return 0;
+ return perf_event__parse_id_sample(event, type, data);
+ }
+
+ array = event->sample.array;
+
+ if (type & PERF_SAMPLE_IP) {
+ data->ip = event->ip.ip;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u32 *p = (u32 *)array;
+ data->pid = p[0];
+ data->tid = p[1];
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ data->time = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ADDR) {
+ data->addr = *array;
+ array++;
+ }
+
+ data->id = -1ULL;
+ if (type & PERF_SAMPLE_ID) {
+ data->id = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ data->stream_id = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_CPU) {
+ u32 *p = (u32 *)array;
+ data->cpu = *p;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_PERIOD) {
+ data->period = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_READ) {
+ fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ data->callchain = (struct ip_callchain *)array;
+ array += 1 + data->callchain->nr;
+ }
+
+ if (type & PERF_SAMPLE_RAW) {
+ u32 *p = (u32 *)array;
+ data->raw_size = *p;
+ p++;
+ data->raw_data = p;
+ }
+
+ return 0;
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b2d755fe88a5..6710ab538342 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -6,6 +6,8 @@
#include "../../../include/linux/perf_event.h"
#include "types.h"
#include "xyarray.h"
+#include "cgroup.h"
+#include "hist.h"
struct perf_counts_values {
union {
@@ -24,31 +26,66 @@ struct perf_counts {
struct perf_counts_values cpu[];
};
+struct perf_evsel;
+
+/*
+ * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
+ * more than one entry in the evlist.
+ */
+struct perf_sample_id {
+ struct hlist_node node;
+ u64 id;
+ struct perf_evsel *evsel;
+};
+
+/** struct perf_evsel - event selector
+ *
+ * @name - Can be set to retain the original event name passed by the user,
+ * so that when showing results in tools such as 'perf stat', we
+ * show the name used, not some alias.
+ */
struct perf_evsel {
struct list_head node;
struct perf_event_attr attr;
char *filter;
struct xyarray *fd;
+ struct xyarray *sample_id;
+ u64 *id;
struct perf_counts *counts;
int idx;
- void *priv;
+ int ids;
+ struct hists hists;
+ char *name;
+ union {
+ void *priv;
+ off_t id_offset;
+ };
+ struct cgroup_sel *cgrp;
};
struct cpu_map;
struct thread_map;
+struct perf_evlist;
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
+void perf_evsel__init(struct perf_evsel *evsel,
+ struct perf_event_attr *attr, int idx);
+void perf_evsel__exit(struct perf_evsel *evsel);
void perf_evsel__delete(struct perf_evsel *evsel);
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
void perf_evsel__free_fd(struct perf_evsel *evsel);
+void perf_evsel__free_id(struct perf_evsel *evsel);
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
-int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus);
-int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads);
-int perf_evsel__open(struct perf_evsel *evsel,
- struct cpu_map *cpus, struct thread_map *threads);
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
+ struct cpu_map *cpus, bool group, bool inherit);
+int perf_evsel__open_per_thread(struct perf_evsel *evsel,
+ struct thread_map *threads, bool group, bool inherit);
+int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+ struct thread_map *threads, bool group, bool inherit);
#define perf_evsel__match(evsel, t, c) \
(evsel->attr.type == PERF_TYPE_##t && \
diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c
index 67eeff571568..7adf4ad15d8f 100644
--- a/tools/perf/util/exec_cmd.c
+++ b/tools/perf/util/exec_cmd.c
@@ -11,31 +11,12 @@ static const char *argv0_path;
const char *system_path(const char *path)
{
-#ifdef RUNTIME_PREFIX
- static const char *prefix;
-#else
static const char *prefix = PREFIX;
-#endif
struct strbuf d = STRBUF_INIT;
if (is_absolute_path(path))
return path;
-#ifdef RUNTIME_PREFIX
- assert(argv0_path);
- assert(is_absolute_path(argv0_path));
-
- if (!prefix &&
- !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) &&
- !(prefix = strip_path_suffix(argv0_path, BINDIR)) &&
- !(prefix = strip_path_suffix(argv0_path, "perf"))) {
- prefix = PREFIX;
- fprintf(stderr, "RUNTIME_PREFIX requested, "
- "but prefix computation failed. "
- "Using static fallback '%s'.\n", prefix);
- }
-#endif
-
strbuf_addf(&d, "%s/%s", prefix, path);
path = strbuf_detach(&d, NULL);
return path;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f6a929e74981..e5230c0ef95b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -8,6 +8,8 @@
#include <linux/list.h>
#include <linux/kernel.h>
+#include "evlist.h"
+#include "evsel.h"
#include "util.h"
#include "header.h"
#include "../perf.h"
@@ -18,89 +20,6 @@
static bool no_buildid_cache = false;
-/*
- * Create new perf.data header attribute:
- */
-struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr)
-{
- struct perf_header_attr *self = malloc(sizeof(*self));
-
- if (self != NULL) {
- self->attr = *attr;
- self->ids = 0;
- self->size = 1;
- self->id = malloc(sizeof(u64));
- if (self->id == NULL) {
- free(self);
- self = NULL;
- }
- }
-
- return self;
-}
-
-void perf_header_attr__delete(struct perf_header_attr *self)
-{
- free(self->id);
- free(self);
-}
-
-int perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
-{
- int pos = self->ids;
-
- self->ids++;
- if (self->ids > self->size) {
- int nsize = self->size * 2;
- u64 *nid = realloc(self->id, nsize * sizeof(u64));
-
- if (nid == NULL)
- return -1;
-
- self->size = nsize;
- self->id = nid;
- }
- self->id[pos] = id;
- return 0;
-}
-
-int perf_header__init(struct perf_header *self)
-{
- self->size = 1;
- self->attr = malloc(sizeof(void *));
- return self->attr == NULL ? -ENOMEM : 0;
-}
-
-void perf_header__exit(struct perf_header *self)
-{
- int i;
- for (i = 0; i < self->attrs; ++i)
- perf_header_attr__delete(self->attr[i]);
- free(self->attr);
-}
-
-int perf_header__add_attr(struct perf_header *self,
- struct perf_header_attr *attr)
-{
- if (self->frozen)
- return -1;
-
- if (self->attrs == self->size) {
- int nsize = self->size * 2;
- struct perf_header_attr **nattr;
-
- nattr = realloc(self->attr, nsize * sizeof(void *));
- if (nattr == NULL)
- return -1;
-
- self->size = nsize;
- self->attr = nattr;
- }
-
- self->attr[self->attrs++] = attr;
- return 0;
-}
-
static int event_count;
static struct perf_trace_event_type *events;
@@ -147,19 +66,19 @@ struct perf_file_attr {
struct perf_file_section ids;
};
-void perf_header__set_feat(struct perf_header *self, int feat)
+void perf_header__set_feat(struct perf_header *header, int feat)
{
- set_bit(feat, self->adds_features);
+ set_bit(feat, header->adds_features);
}
-void perf_header__clear_feat(struct perf_header *self, int feat)
+void perf_header__clear_feat(struct perf_header *header, int feat)
{
- clear_bit(feat, self->adds_features);
+ clear_bit(feat, header->adds_features);
}
-bool perf_header__has_feat(const struct perf_header *self, int feat)
+bool perf_header__has_feat(const struct perf_header *header, int feat)
{
- return test_bit(feat, self->adds_features);
+ return test_bit(feat, header->adds_features);
}
static int do_write(int fd, const void *buf, size_t size)
@@ -228,22 +147,22 @@ static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
return 0;
}
-static int machine__write_buildid_table(struct machine *self, int fd)
+static int machine__write_buildid_table(struct machine *machine, int fd)
{
int err;
u16 kmisc = PERF_RECORD_MISC_KERNEL,
umisc = PERF_RECORD_MISC_USER;
- if (!machine__is_host(self)) {
+ if (!machine__is_host(machine)) {
kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
umisc = PERF_RECORD_MISC_GUEST_USER;
}
- err = __dsos__write_buildid_table(&self->kernel_dsos, self->pid,
+ err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid,
kmisc, fd);
if (err == 0)
- err = __dsos__write_buildid_table(&self->user_dsos,
- self->pid, umisc, fd);
+ err = __dsos__write_buildid_table(&machine->user_dsos,
+ machine->pid, umisc, fd);
return err;
}
@@ -270,11 +189,15 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
const char *name, bool is_kallsyms)
{
const size_t size = PATH_MAX;
- char *realname = realpath(name, NULL),
- *filename = malloc(size),
+ char *realname, *filename = malloc(size),
*linkname = malloc(size), *targetname;
int len, err = -1;
+ if (is_kallsyms)
+ realname = (char *)name;
+ else
+ realname = realpath(name, NULL);
+
if (realname == NULL || filename == NULL || linkname == NULL)
goto out_free;
@@ -306,7 +229,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
if (symlink(targetname, linkname) == 0)
err = 0;
out_free:
- free(realname);
+ if (!is_kallsyms)
+ free(realname);
free(filename);
free(linkname);
return err;
@@ -361,12 +285,12 @@ out_free:
return err;
}
-static int dso__cache_build_id(struct dso *self, const char *debugdir)
+static int dso__cache_build_id(struct dso *dso, const char *debugdir)
{
- bool is_kallsyms = self->kernel && self->long_name[0] != '/';
+ bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
- return build_id_cache__add_b(self->build_id, sizeof(self->build_id),
- self->long_name, debugdir, is_kallsyms);
+ return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id),
+ dso->long_name, debugdir, is_kallsyms);
}
static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
@@ -381,14 +305,14 @@ static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
return err;
}
-static int machine__cache_build_ids(struct machine *self, const char *debugdir)
+static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
{
- int ret = __dsos__cache_build_ids(&self->kernel_dsos, debugdir);
- ret |= __dsos__cache_build_ids(&self->user_dsos, debugdir);
+ int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir);
+ ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir);
return ret;
}
-static int perf_session__cache_build_ids(struct perf_session *self)
+static int perf_session__cache_build_ids(struct perf_session *session)
{
struct rb_node *nd;
int ret;
@@ -399,28 +323,28 @@ static int perf_session__cache_build_ids(struct perf_session *self)
if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
return -1;
- ret = machine__cache_build_ids(&self->host_machine, debugdir);
+ ret = machine__cache_build_ids(&session->host_machine, debugdir);
- for (nd = rb_first(&self->machines); nd; nd = rb_next(nd)) {
+ for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
ret |= machine__cache_build_ids(pos, debugdir);
}
return ret ? -1 : 0;
}
-static bool machine__read_build_ids(struct machine *self, bool with_hits)
+static bool machine__read_build_ids(struct machine *machine, bool with_hits)
{
- bool ret = __dsos__read_build_ids(&self->kernel_dsos, with_hits);
- ret |= __dsos__read_build_ids(&self->user_dsos, with_hits);
+ bool ret = __dsos__read_build_ids(&machine->kernel_dsos, with_hits);
+ ret |= __dsos__read_build_ids(&machine->user_dsos, with_hits);
return ret;
}
-static bool perf_session__read_build_ids(struct perf_session *self, bool with_hits)
+static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
{
struct rb_node *nd;
- bool ret = machine__read_build_ids(&self->host_machine, with_hits);
+ bool ret = machine__read_build_ids(&session->host_machine, with_hits);
- for (nd = rb_first(&self->machines); nd; nd = rb_next(nd)) {
+ for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
ret |= machine__read_build_ids(pos, with_hits);
}
@@ -428,7 +352,8 @@ static bool perf_session__read_build_ids(struct perf_session *self, bool with_hi
return ret;
}
-static int perf_header__adds_write(struct perf_header *self, int fd)
+static int perf_header__adds_write(struct perf_header *header,
+ struct perf_evlist *evlist, int fd)
{
int nr_sections;
struct perf_session *session;
@@ -437,13 +362,13 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
u64 sec_start;
int idx = 0, err;
- session = container_of(self, struct perf_session, header);
+ session = container_of(header, struct perf_session, header);
- if (perf_header__has_feat(self, HEADER_BUILD_ID &&
+ if (perf_header__has_feat(header, HEADER_BUILD_ID &&
!perf_session__read_build_ids(session, true)))
- perf_header__clear_feat(self, HEADER_BUILD_ID);
+ perf_header__clear_feat(header, HEADER_BUILD_ID);
- nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
+ nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
if (!nr_sections)
return 0;
@@ -453,28 +378,28 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
sec_size = sizeof(*feat_sec) * nr_sections;
- sec_start = self->data_offset + self->data_size;
+ sec_start = header->data_offset + header->data_size;
lseek(fd, sec_start + sec_size, SEEK_SET);
- if (perf_header__has_feat(self, HEADER_TRACE_INFO)) {
+ if (perf_header__has_feat(header, HEADER_TRACE_INFO)) {
struct perf_file_section *trace_sec;
trace_sec = &feat_sec[idx++];
/* Write trace info */
trace_sec->offset = lseek(fd, 0, SEEK_CUR);
- read_tracing_data(fd, &evsel_list);
+ read_tracing_data(fd, &evlist->entries);
trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
}
- if (perf_header__has_feat(self, HEADER_BUILD_ID)) {
+ if (perf_header__has_feat(header, HEADER_BUILD_ID)) {
struct perf_file_section *buildid_sec;
buildid_sec = &feat_sec[idx++];
/* Write build-ids */
buildid_sec->offset = lseek(fd, 0, SEEK_CUR);
- err = dsos__write_buildid_table(self, fd);
+ err = dsos__write_buildid_table(header, fd);
if (err < 0) {
pr_debug("failed to write buildid table\n");
goto out_free;
@@ -513,32 +438,41 @@ int perf_header__write_pipe(int fd)
return 0;
}
-int perf_header__write(struct perf_header *self, int fd, bool at_exit)
+int perf_session__write_header(struct perf_session *session,
+ struct perf_evlist *evlist,
+ int fd, bool at_exit)
{
struct perf_file_header f_header;
struct perf_file_attr f_attr;
- struct perf_header_attr *attr;
- int i, err;
+ struct perf_header *header = &session->header;
+ struct perf_evsel *attr, *pair = NULL;
+ int err;
lseek(fd, sizeof(f_header), SEEK_SET);
- for (i = 0; i < self->attrs; i++) {
- attr = self->attr[i];
+ if (session->evlist != evlist)
+ pair = list_entry(session->evlist->entries.next, struct perf_evsel, node);
+ list_for_each_entry(attr, &evlist->entries, node) {
attr->id_offset = lseek(fd, 0, SEEK_CUR);
err = do_write(fd, attr->id, attr->ids * sizeof(u64));
if (err < 0) {
+out_err_write:
pr_debug("failed to write perf header\n");
return err;
}
+ if (session->evlist != evlist) {
+ err = do_write(fd, pair->id, pair->ids * sizeof(u64));
+ if (err < 0)
+ goto out_err_write;
+ attr->ids += pair->ids;
+ pair = list_entry(pair->node.next, struct perf_evsel, node);
+ }
}
+ header->attr_offset = lseek(fd, 0, SEEK_CUR);
- self->attr_offset = lseek(fd, 0, SEEK_CUR);
-
- for (i = 0; i < self->attrs; i++) {
- attr = self->attr[i];
-
+ list_for_each_entry(attr, &evlist->entries, node) {
f_attr = (struct perf_file_attr){
.attr = attr->attr,
.ids = {
@@ -553,20 +487,20 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
}
}
- self->event_offset = lseek(fd, 0, SEEK_CUR);
- self->event_size = event_count * sizeof(struct perf_trace_event_type);
+ header->event_offset = lseek(fd, 0, SEEK_CUR);
+ header->event_size = event_count * sizeof(struct perf_trace_event_type);
if (events) {
- err = do_write(fd, events, self->event_size);
+ err = do_write(fd, events, header->event_size);
if (err < 0) {
pr_debug("failed to write perf header events\n");
return err;
}
}
- self->data_offset = lseek(fd, 0, SEEK_CUR);
+ header->data_offset = lseek(fd, 0, SEEK_CUR);
if (at_exit) {
- err = perf_header__adds_write(self, fd);
+ err = perf_header__adds_write(header, evlist, fd);
if (err < 0)
return err;
}
@@ -576,20 +510,20 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
.size = sizeof(f_header),
.attr_size = sizeof(f_attr),
.attrs = {
- .offset = self->attr_offset,
- .size = self->attrs * sizeof(f_attr),
+ .offset = header->attr_offset,
+ .size = evlist->nr_entries * sizeof(f_attr),
},
.data = {
- .offset = self->data_offset,
- .size = self->data_size,
+ .offset = header->data_offset,
+ .size = header->data_size,
},
.event_types = {
- .offset = self->event_offset,
- .size = self->event_size,
+ .offset = header->event_offset,
+ .size = header->event_size,
},
};
- memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features));
+ memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features));
lseek(fd, 0, SEEK_SET);
err = do_write(fd, &f_header, sizeof(f_header));
@@ -597,26 +531,26 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
pr_debug("failed to write perf header\n");
return err;
}
- lseek(fd, self->data_offset + self->data_size, SEEK_SET);
+ lseek(fd, header->data_offset + header->data_size, SEEK_SET);
- self->frozen = 1;
+ header->frozen = 1;
return 0;
}
-static int perf_header__getbuffer64(struct perf_header *self,
+static int perf_header__getbuffer64(struct perf_header *header,
int fd, void *buf, size_t size)
{
if (readn(fd, buf, size) <= 0)
return -1;
- if (self->needs_swap)
+ if (header->needs_swap)
mem_bswap_64(buf, size);
return 0;
}
-int perf_header__process_sections(struct perf_header *self, int fd,
- int (*process)(struct perf_file_section *self,
+int perf_header__process_sections(struct perf_header *header, int fd,
+ int (*process)(struct perf_file_section *section,
struct perf_header *ph,
int feat, int fd))
{
@@ -626,7 +560,7 @@ int perf_header__process_sections(struct perf_header *self, int fd,
int idx = 0;
int err = -1, feat = 1;
- nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
+ nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
if (!nr_sections)
return 0;
@@ -636,17 +570,17 @@ int perf_header__process_sections(struct perf_header *self, int fd,
sec_size = sizeof(*feat_sec) * nr_sections;
- lseek(fd, self->data_offset + self->data_size, SEEK_SET);
+ lseek(fd, header->data_offset + header->data_size, SEEK_SET);
- if (perf_header__getbuffer64(self, fd, feat_sec, sec_size))
+ if (perf_header__getbuffer64(header, fd, feat_sec, sec_size))
goto out_free;
err = 0;
while (idx < nr_sections && feat < HEADER_LAST_FEATURE) {
- if (perf_header__has_feat(self, feat)) {
+ if (perf_header__has_feat(header, feat)) {
struct perf_file_section *sec = &feat_sec[idx++];
- err = process(sec, self, feat, fd);
+ err = process(sec, header, feat, fd);
if (err < 0)
break;
}
@@ -657,35 +591,35 @@ out_free:
return err;
}
-int perf_file_header__read(struct perf_file_header *self,
+int perf_file_header__read(struct perf_file_header *header,
struct perf_header *ph, int fd)
{
lseek(fd, 0, SEEK_SET);
- if (readn(fd, self, sizeof(*self)) <= 0 ||
- memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
+ if (readn(fd, header, sizeof(*header)) <= 0 ||
+ memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
return -1;
- if (self->attr_size != sizeof(struct perf_file_attr)) {
- u64 attr_size = bswap_64(self->attr_size);
+ if (header->attr_size != sizeof(struct perf_file_attr)) {
+ u64 attr_size = bswap_64(header->attr_size);
if (attr_size != sizeof(struct perf_file_attr))
return -1;
- mem_bswap_64(self, offsetof(struct perf_file_header,
+ mem_bswap_64(header, offsetof(struct perf_file_header,
adds_features));
ph->needs_swap = true;
}
- if (self->size != sizeof(*self)) {
+ if (header->size != sizeof(*header)) {
/* Support the previous format */
- if (self->size == offsetof(typeof(*self), adds_features))
- bitmap_zero(self->adds_features, HEADER_FEAT_BITS);
+ if (header->size == offsetof(typeof(*header), adds_features))
+ bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
else
return -1;
}
- memcpy(&ph->adds_features, &self->adds_features,
+ memcpy(&ph->adds_features, &header->adds_features,
sizeof(ph->adds_features));
/*
* FIXME: hack that assumes that if we need swap the perf.data file
@@ -699,10 +633,10 @@ int perf_file_header__read(struct perf_file_header *self,
perf_header__set_feat(ph, HEADER_BUILD_ID);
}
- ph->event_offset = self->event_types.offset;
- ph->event_size = self->event_types.size;
- ph->data_offset = self->data.offset;
- ph->data_size = self->data.size;
+ ph->event_offset = header->event_types.offset;
+ ph->event_size = header->event_types.size;
+ ph->data_offset = header->data.offset;
+ ph->data_size = header->data.size;
return 0;
}
@@ -761,11 +695,10 @@ out:
return err;
}
-static int perf_header__read_build_ids(struct perf_header *self,
- int input, u64 offset, u64 size)
+static int perf_header__read_build_ids(struct perf_header *header,
+ int input, u64 offset, u64 size)
{
- struct perf_session *session = container_of(self,
- struct perf_session, header);
+ struct perf_session *session = container_of(header, struct perf_session, header);
struct build_id_event bev;
char filename[PATH_MAX];
u64 limit = offset + size;
@@ -777,7 +710,7 @@ static int perf_header__read_build_ids(struct perf_header *self,
if (read(input, &bev, sizeof(bev)) != sizeof(bev))
goto out;
- if (self->needs_swap)
+ if (header->needs_swap)
perf_event_header__bswap(&bev.header);
len = bev.header.size - sizeof(bev);
@@ -793,13 +726,13 @@ out:
return err;
}
-static int perf_file_section__process(struct perf_file_section *self,
+static int perf_file_section__process(struct perf_file_section *section,
struct perf_header *ph,
int feat, int fd)
{
- if (lseek(fd, self->offset, SEEK_SET) == (off_t)-1) {
+ if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
- "%d, continuing...\n", self->offset, feat);
+ "%d, continuing...\n", section->offset, feat);
return 0;
}
@@ -809,7 +742,7 @@ static int perf_file_section__process(struct perf_file_section *self,
break;
case HEADER_BUILD_ID:
- if (perf_header__read_build_ids(ph, fd, self->offset, self->size))
+ if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
pr_debug("Failed to read buildids, continuing...\n");
break;
default:
@@ -819,21 +752,21 @@ static int perf_file_section__process(struct perf_file_section *self,
return 0;
}
-static int perf_file_header__read_pipe(struct perf_pipe_file_header *self,
+static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
struct perf_header *ph, int fd,
bool repipe)
{
- if (readn(fd, self, sizeof(*self)) <= 0 ||
- memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
+ if (readn(fd, header, sizeof(*header)) <= 0 ||
+ memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
return -1;
- if (repipe && do_write(STDOUT_FILENO, self, sizeof(*self)) < 0)
+ if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
return -1;
- if (self->size != sizeof(*self)) {
- u64 size = bswap_64(self->size);
+ if (header->size != sizeof(*header)) {
+ u64 size = bswap_64(header->size);
- if (size != sizeof(*self))
+ if (size != sizeof(*header))
return -1;
ph->needs_swap = true;
@@ -844,10 +777,10 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *self,
static int perf_header__read_pipe(struct perf_session *session, int fd)
{
- struct perf_header *self = &session->header;
+ struct perf_header *header = &session->header;
struct perf_pipe_file_header f_header;
- if (perf_file_header__read_pipe(&f_header, self, fd,
+ if (perf_file_header__read_pipe(&f_header, header, fd,
session->repipe) < 0) {
pr_debug("incompatible file format\n");
return -EINVAL;
@@ -858,18 +791,22 @@ static int perf_header__read_pipe(struct perf_session *session, int fd)
return 0;
}
-int perf_header__read(struct perf_session *session, int fd)
+int perf_session__read_header(struct perf_session *session, int fd)
{
- struct perf_header *self = &session->header;
+ struct perf_header *header = &session->header;
struct perf_file_header f_header;
struct perf_file_attr f_attr;
u64 f_id;
int nr_attrs, nr_ids, i, j;
+ session->evlist = perf_evlist__new(NULL, NULL);
+ if (session->evlist == NULL)
+ return -ENOMEM;
+
if (session->fd_pipe)
return perf_header__read_pipe(session, fd);
- if (perf_file_header__read(&f_header, self, fd) < 0) {
+ if (perf_file_header__read(&f_header, header, fd) < 0) {
pr_debug("incompatible file format\n");
return -EINVAL;
}
@@ -878,33 +815,39 @@ int perf_header__read(struct perf_session *session, int fd)
lseek(fd, f_header.attrs.offset, SEEK_SET);
for (i = 0; i < nr_attrs; i++) {
- struct perf_header_attr *attr;
+ struct perf_evsel *evsel;
off_t tmp;
- if (perf_header__getbuffer64(self, fd, &f_attr, sizeof(f_attr)))
+ if (perf_header__getbuffer64(header, fd, &f_attr, sizeof(f_attr)))
goto out_errno;
tmp = lseek(fd, 0, SEEK_CUR);
+ evsel = perf_evsel__new(&f_attr.attr, i);
- attr = perf_header_attr__new(&f_attr.attr);
- if (attr == NULL)
- return -ENOMEM;
+ if (evsel == NULL)
+ goto out_delete_evlist;
+ /*
+ * Do it before so that if perf_evsel__alloc_id fails, this
+ * entry gets purged too at perf_evlist__delete().
+ */
+ perf_evlist__add(session->evlist, evsel);
nr_ids = f_attr.ids.size / sizeof(u64);
+ /*
+ * We don't have the cpu and thread maps on the header, so
+ * for allocating the perf_sample_id table we fake 1 cpu and
+ * hattr->ids threads.
+ */
+ if (perf_evsel__alloc_id(evsel, 1, nr_ids))
+ goto out_delete_evlist;
+
lseek(fd, f_attr.ids.offset, SEEK_SET);
for (j = 0; j < nr_ids; j++) {
- if (perf_header__getbuffer64(self, fd, &f_id, sizeof(f_id)))
+ if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id)))
goto out_errno;
- if (perf_header_attr__add_id(attr, f_id) < 0) {
- perf_header_attr__delete(attr);
- return -ENOMEM;
- }
- }
- if (perf_header__add_attr(self, attr) < 0) {
- perf_header_attr__delete(attr);
- return -ENOMEM;
+ perf_evlist__id_add(session->evlist, evsel, 0, j, f_id);
}
lseek(fd, tmp, SEEK_SET);
@@ -915,93 +858,63 @@ int perf_header__read(struct perf_session *session, int fd)
events = malloc(f_header.event_types.size);
if (events == NULL)
return -ENOMEM;
- if (perf_header__getbuffer64(self, fd, events,
+ if (perf_header__getbuffer64(header, fd, events,
f_header.event_types.size))
goto out_errno;
event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type);
}
- perf_header__process_sections(self, fd, perf_file_section__process);
+ perf_header__process_sections(header, fd, perf_file_section__process);
- lseek(fd, self->data_offset, SEEK_SET);
+ lseek(fd, header->data_offset, SEEK_SET);
- self->frozen = 1;
+ header->frozen = 1;
return 0;
out_errno:
return -errno;
+
+out_delete_evlist:
+ perf_evlist__delete(session->evlist);
+ session->evlist = NULL;
+ return -ENOMEM;
}
-u64 perf_header__sample_type(struct perf_header *header)
+u64 perf_evlist__sample_type(struct perf_evlist *evlist)
{
+ struct perf_evsel *pos;
u64 type = 0;
- int i;
-
- for (i = 0; i < header->attrs; i++) {
- struct perf_header_attr *attr = header->attr[i];
+ list_for_each_entry(pos, &evlist->entries, node) {
if (!type)
- type = attr->attr.sample_type;
- else if (type != attr->attr.sample_type)
+ type = pos->attr.sample_type;
+ else if (type != pos->attr.sample_type)
die("non matching sample_type");
}
return type;
}
-bool perf_header__sample_id_all(const struct perf_header *header)
+bool perf_evlist__sample_id_all(const struct perf_evlist *evlist)
{
bool value = false, first = true;
- int i;
-
- for (i = 0; i < header->attrs; i++) {
- struct perf_header_attr *attr = header->attr[i];
+ struct perf_evsel *pos;
+ list_for_each_entry(pos, &evlist->entries, node) {
if (first) {
- value = attr->attr.sample_id_all;
+ value = pos->attr.sample_id_all;
first = false;
- } else if (value != attr->attr.sample_id_all)
+ } else if (value != pos->attr.sample_id_all)
die("non matching sample_id_all");
}
return value;
}
-struct perf_event_attr *
-perf_header__find_attr(u64 id, struct perf_header *header)
-{
- int i;
-
- /*
- * We set id to -1 if the data file doesn't contain sample
- * ids. This can happen when the data file contains one type
- * of event and in that case, the header can still store the
- * event attribute information. Check for this and avoid
- * walking through the entire list of ids which may be large.
- */
- if (id == -1ULL) {
- if (header->attrs > 0)
- return &header->attr[0]->attr;
- return NULL;
- }
-
- for (i = 0; i < header->attrs; i++) {
- struct perf_header_attr *attr = header->attr[i];
- int j;
-
- for (j = 0; j < attr->ids; j++) {
- if (attr->id[j] == id)
- return &attr->attr;
- }
- }
-
- return NULL;
-}
-
-int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
- event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
+ perf_event__handler_t process,
+ struct perf_session *session)
{
- event_t *ev;
+ union perf_event *ev;
size_t size;
int err;
@@ -1028,17 +941,15 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
return err;
}
-int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
- struct perf_session *session)
+int perf_session__synthesize_attrs(struct perf_session *session,
+ perf_event__handler_t process)
{
- struct perf_header_attr *attr;
- int i, err = 0;
-
- for (i = 0; i < self->attrs; i++) {
- attr = self->attr[i];
+ struct perf_evsel *attr;
+ int err = 0;
- err = event__synthesize_attr(&attr->attr, attr->ids, attr->id,
- process, session);
+ list_for_each_entry(attr, &session->evlist->entries, node) {
+ err = perf_event__synthesize_attr(&attr->attr, attr->ids,
+ attr->id, process, session);
if (err) {
pr_debug("failed to create perf header attribute\n");
return err;
@@ -1048,29 +959,39 @@ int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
return err;
}
-int event__process_attr(event_t *self, struct perf_session *session)
+int perf_event__process_attr(union perf_event *event,
+ struct perf_session *session)
{
- struct perf_header_attr *attr;
unsigned int i, ids, n_ids;
+ struct perf_evsel *evsel;
+
+ if (session->evlist == NULL) {
+ session->evlist = perf_evlist__new(NULL, NULL);
+ if (session->evlist == NULL)
+ return -ENOMEM;
+ }
- attr = perf_header_attr__new(&self->attr.attr);
- if (attr == NULL)
+ evsel = perf_evsel__new(&event->attr.attr,
+ session->evlist->nr_entries);
+ if (evsel == NULL)
return -ENOMEM;
- ids = self->header.size;
- ids -= (void *)&self->attr.id - (void *)self;
+ perf_evlist__add(session->evlist, evsel);
+
+ ids = event->header.size;
+ ids -= (void *)&event->attr.id - (void *)event;
n_ids = ids / sizeof(u64);
+ /*
+ * We don't have the cpu and thread maps on the header, so
+ * for allocating the perf_sample_id table we fake 1 cpu and
+ * hattr->ids threads.
+ */
+ if (perf_evsel__alloc_id(evsel, 1, n_ids))
+ return -ENOMEM;
for (i = 0; i < n_ids; i++) {
- if (perf_header_attr__add_id(attr, self->attr.id[i]) < 0) {
- perf_header_attr__delete(attr);
- return -ENOMEM;
- }
- }
-
- if (perf_header__add_attr(&session->header, attr) < 0) {
- perf_header_attr__delete(attr);
- return -ENOMEM;
+ perf_evlist__id_add(session->evlist, evsel, 0, i,
+ event->attr.id[i]);
}
perf_session__update_sample_type(session);
@@ -1078,11 +999,11 @@ int event__process_attr(event_t *self, struct perf_session *session)
return 0;
}
-int event__synthesize_event_type(u64 event_id, char *name,
- event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_event_type(u64 event_id, char *name,
+ perf_event__handler_t process,
+ struct perf_session *session)
{
- event_t ev;
+ union perf_event ev;
size_t size = 0;
int err = 0;
@@ -1103,8 +1024,8 @@ int event__synthesize_event_type(u64 event_id, char *name,
return err;
}
-int event__synthesize_event_types(event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_event_types(perf_event__handler_t process,
+ struct perf_session *session)
{
struct perf_trace_event_type *type;
int i, err = 0;
@@ -1112,8 +1033,9 @@ int event__synthesize_event_types(event__handler_t process,
for (i = 0; i < event_count; i++) {
type = &events[i];
- err = event__synthesize_event_type(type->event_id, type->name,
- process, session);
+ err = perf_event__synthesize_event_type(type->event_id,
+ type->name, process,
+ session);
if (err) {
pr_debug("failed to create perf header event type\n");
return err;
@@ -1123,28 +1045,28 @@ int event__synthesize_event_types(event__handler_t process,
return err;
}
-int event__process_event_type(event_t *self,
- struct perf_session *session __unused)
+int perf_event__process_event_type(union perf_event *event,
+ struct perf_session *session __unused)
{
- if (perf_header__push_event(self->event_type.event_type.event_id,
- self->event_type.event_type.name) < 0)
+ if (perf_header__push_event(event->event_type.event_type.event_id,
+ event->event_type.event_type.name) < 0)
return -ENOMEM;
return 0;
}
-int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
- event__handler_t process,
+int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
+ perf_event__handler_t process,
struct perf_session *session __unused)
{
- event_t ev;
+ union perf_event ev;
ssize_t size = 0, aligned_size = 0, padding;
- int err = 0;
+ int err __used = 0;
memset(&ev, 0, sizeof(ev));
ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
- size = read_tracing_data_size(fd, pattrs);
+ size = read_tracing_data_size(fd, &evlist->entries);
if (size <= 0)
return size;
aligned_size = ALIGN(size, sizeof(u64));
@@ -1154,16 +1076,16 @@ int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
process(&ev, NULL, session);
- err = read_tracing_data(fd, pattrs);
+ err = read_tracing_data(fd, &evlist->entries);
write_padded(fd, NULL, 0, padding);
return aligned_size;
}
-int event__process_tracing_data(event_t *self,
- struct perf_session *session)
+int perf_event__process_tracing_data(union perf_event *event,
+ struct perf_session *session)
{
- ssize_t size_read, padding, size = self->tracing_data.size;
+ ssize_t size_read, padding, size = event->tracing_data.size;
off_t offset = lseek(session->fd, 0, SEEK_CUR);
char buf[BUFSIZ];
@@ -1189,12 +1111,12 @@ int event__process_tracing_data(event_t *self,
return size_read + padding;
}
-int event__synthesize_build_id(struct dso *pos, u16 misc,
- event__handler_t process,
- struct machine *machine,
- struct perf_session *session)
+int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
+ perf_event__handler_t process,
+ struct machine *machine,
+ struct perf_session *session)
{
- event_t ev;
+ union perf_event ev;
size_t len;
int err = 0;
@@ -1217,11 +1139,11 @@ int event__synthesize_build_id(struct dso *pos, u16 misc,
return err;
}
-int event__process_build_id(event_t *self,
- struct perf_session *session)
+int perf_event__process_build_id(union perf_event *event,
+ struct perf_session *session)
{
- __event_process_build_id(&self->build_id,
- self->build_id.filename,
+ __event_process_build_id(&event->build_id,
+ event->build_id.filename,
session);
return 0;
}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 33f16be7b72f..456661d7f10e 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -9,13 +9,6 @@
#include <linux/bitmap.h>
-struct perf_header_attr {
- struct perf_event_attr attr;
- int ids, size;
- u64 *id;
- off_t id_offset;
-};
-
enum {
HEADER_TRACE_INFO = 1,
HEADER_BUILD_ID,
@@ -46,14 +39,12 @@ struct perf_pipe_file_header {
struct perf_header;
-int perf_file_header__read(struct perf_file_header *self,
+int perf_file_header__read(struct perf_file_header *header,
struct perf_header *ph, int fd);
struct perf_header {
int frozen;
- int attrs, size;
bool needs_swap;
- struct perf_header_attr **attr;
s64 attr_offset;
u64 data_offset;
u64 data_size;
@@ -62,34 +53,25 @@ struct perf_header {
DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
};
-int perf_header__init(struct perf_header *self);
-void perf_header__exit(struct perf_header *self);
+struct perf_evlist;
-int perf_header__read(struct perf_session *session, int fd);
-int perf_header__write(struct perf_header *self, int fd, bool at_exit);
+int perf_session__read_header(struct perf_session *session, int fd);
+int perf_session__write_header(struct perf_session *session,
+ struct perf_evlist *evlist,
+ int fd, bool at_exit);
int perf_header__write_pipe(int fd);
-int perf_header__add_attr(struct perf_header *self,
- struct perf_header_attr *attr);
-
int perf_header__push_event(u64 id, const char *name);
char *perf_header__find_event(u64 id);
-struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr);
-void perf_header_attr__delete(struct perf_header_attr *self);
+u64 perf_evlist__sample_type(struct perf_evlist *evlist);
+bool perf_evlist__sample_id_all(const struct perf_evlist *evlist);
+void perf_header__set_feat(struct perf_header *header, int feat);
+void perf_header__clear_feat(struct perf_header *header, int feat);
+bool perf_header__has_feat(const struct perf_header *header, int feat);
-int perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
-
-u64 perf_header__sample_type(struct perf_header *header);
-bool perf_header__sample_id_all(const struct perf_header *header);
-struct perf_event_attr *
-perf_header__find_attr(u64 id, struct perf_header *header);
-void perf_header__set_feat(struct perf_header *self, int feat);
-void perf_header__clear_feat(struct perf_header *self, int feat);
-bool perf_header__has_feat(const struct perf_header *self, int feat);
-
-int perf_header__process_sections(struct perf_header *self, int fd,
- int (*process)(struct perf_file_section *self,
+int perf_header__process_sections(struct perf_header *header, int fd,
+ int (*process)(struct perf_file_section *section,
struct perf_header *ph,
int feat, int fd));
@@ -97,32 +79,31 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
const char *name, bool is_kallsyms);
int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
-int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
- event__handler_t process,
- struct perf_session *session);
-int event__synthesize_attrs(struct perf_header *self,
- event__handler_t process,
- struct perf_session *session);
-int event__process_attr(event_t *self, struct perf_session *session);
-
-int event__synthesize_event_type(u64 event_id, char *name,
- event__handler_t process,
- struct perf_session *session);
-int event__synthesize_event_types(event__handler_t process,
- struct perf_session *session);
-int event__process_event_type(event_t *self,
- struct perf_session *session);
-
-int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
- event__handler_t process,
- struct perf_session *session);
-int event__process_tracing_data(event_t *self,
+int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
+ perf_event__handler_t process,
struct perf_session *session);
+int perf_session__synthesize_attrs(struct perf_session *session,
+ perf_event__handler_t process);
+int perf_event__process_attr(union perf_event *event, struct perf_session *session);
+
+int perf_event__synthesize_event_type(u64 event_id, char *name,
+ perf_event__handler_t process,
+ struct perf_session *session);
+int perf_event__synthesize_event_types(perf_event__handler_t process,
+ struct perf_session *session);
+int perf_event__process_event_type(union perf_event *event,
+ struct perf_session *session);
-int event__synthesize_build_id(struct dso *pos, u16 misc,
- event__handler_t process,
- struct machine *machine,
- struct perf_session *session);
-int event__process_build_id(event_t *self, struct perf_session *session);
-
+int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
+ perf_event__handler_t process,
+ struct perf_session *session);
+int perf_event__process_tracing_data(union perf_event *event,
+ struct perf_session *session);
+
+int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
+ perf_event__handler_t process,
+ struct machine *machine,
+ struct perf_session *session);
+int perf_event__process_build_id(union perf_event *event,
+ struct perf_session *session);
#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 32f4f1f2f6e4..627a02e03c57 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1,3 +1,4 @@
+#include "annotate.h"
#include "util.h"
#include "build-id.h"
#include "hist.h"
@@ -49,6 +50,15 @@ static void hists__calc_col_len(struct hists *self, struct hist_entry *h)
if (h->ms.sym)
hists__new_col_len(self, HISTC_SYMBOL, h->ms.sym->namelen);
+ else {
+ const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
+
+ if (hists__col_len(self, HISTC_DSO) < unresolved_col_width &&
+ !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
+ !symbol_conf.dso_list)
+ hists__set_col_len(self, HISTC_DSO,
+ unresolved_col_width);
+ }
len = thread__comm_len(h->thread);
if (hists__new_col_len(self, HISTC_COMM, len))
@@ -211,7 +221,9 @@ void hist_entry__free(struct hist_entry *he)
* collapse the histogram
*/
-static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
+static bool hists__collapse_insert_entry(struct hists *self,
+ struct rb_root *root,
+ struct hist_entry *he)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -226,8 +238,11 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
if (!cmp) {
iter->period += he->period;
- if (symbol_conf.use_callchain)
- callchain_merge(iter->callchain, he->callchain);
+ if (symbol_conf.use_callchain) {
+ callchain_cursor_reset(&self->callchain_cursor);
+ callchain_merge(&self->callchain_cursor, iter->callchain,
+ he->callchain);
+ }
hist_entry__free(he);
return false;
}
@@ -262,7 +277,7 @@ void hists__collapse_resort(struct hists *self)
next = rb_next(&n->rb_node);
rb_erase(&n->rb_node, &self->entries);
- if (collapse__insert_entry(&tmp, n))
+ if (hists__collapse_insert_entry(self, &tmp, n))
hists__inc_nr_entries(self, n);
}
@@ -425,7 +440,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
u64 cumul;
child = rb_entry(node, struct callchain_node, rb_node);
- cumul = cumul_hits(child);
+ cumul = callchain_cumul_hits(child);
remaining -= cumul;
/*
@@ -585,6 +600,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
{
struct sort_entry *se;
u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us;
+ u64 nr_events;
const char *sep = symbol_conf.field_sep;
int ret;
@@ -593,6 +609,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
if (pair_hists) {
period = self->pair ? self->pair->period : 0;
+ nr_events = self->pair ? self->pair->nr_events : 0;
total = pair_hists->stats.total_period;
period_sys = self->pair ? self->pair->period_sys : 0;
period_us = self->pair ? self->pair->period_us : 0;
@@ -600,6 +617,7 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
period_guest_us = self->pair ? self->pair->period_guest_us : 0;
} else {
period = self->period;
+ nr_events = self->nr_events;
total = session_total;
period_sys = self->period_sys;
period_us = self->period_us;
@@ -640,9 +658,9 @@ int hist_entry__snprintf(struct hist_entry *self, char *s, size_t size,
if (symbol_conf.show_nr_samples) {
if (sep)
- ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period);
+ ret += snprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events);
else
- ret += snprintf(s + ret, size - ret, "%11" PRIu64, period);
+ ret += snprintf(s + ret, size - ret, "%11" PRIu64, nr_events);
}
if (pair_hists) {
@@ -944,225 +962,14 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread)
}
}
-static int symbol__alloc_hist(struct symbol *self)
-{
- struct sym_priv *priv = symbol__priv(self);
- const int size = (sizeof(*priv->hist) +
- (self->end - self->start) * sizeof(u64));
-
- priv->hist = zalloc(size);
- return priv->hist == NULL ? -1 : 0;
-}
-
-int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip)
-{
- unsigned int sym_size, offset;
- struct symbol *sym = self->ms.sym;
- struct sym_priv *priv;
- struct sym_hist *h;
-
- if (!sym || !self->ms.map)
- return 0;
-
- priv = symbol__priv(sym);
- if (priv->hist == NULL && symbol__alloc_hist(sym) < 0)
- return -ENOMEM;
-
- sym_size = sym->end - sym->start;
- offset = ip - sym->start;
-
- pr_debug3("%s: ip=%#" PRIx64 "\n", __func__, self->ms.map->unmap_ip(self->ms.map, ip));
-
- if (offset >= sym_size)
- return 0;
-
- h = priv->hist;
- h->sum++;
- h->ip[offset]++;
-
- pr_debug3("%#" PRIx64 " %s: period++ [ip: %#" PRIx64 ", %#" PRIx64
- "] => %" PRIu64 "\n", self->ms.sym->start, self->ms.sym->name,
- ip, ip - self->ms.sym->start, h->ip[offset]);
- return 0;
-}
-
-static struct objdump_line *objdump_line__new(s64 offset, char *line, size_t privsize)
-{
- struct objdump_line *self = malloc(sizeof(*self) + privsize);
-
- if (self != NULL) {
- self->offset = offset;
- self->line = line;
- }
-
- return self;
-}
-
-void objdump_line__free(struct objdump_line *self)
-{
- free(self->line);
- free(self);
-}
-
-static void objdump__add_line(struct list_head *head, struct objdump_line *line)
-{
- list_add_tail(&line->node, head);
-}
-
-struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
- struct objdump_line *pos)
-{
- list_for_each_entry_continue(pos, head, node)
- if (pos->offset >= 0)
- return pos;
-
- return NULL;
-}
-
-static int hist_entry__parse_objdump_line(struct hist_entry *self, FILE *file,
- struct list_head *head, size_t privsize)
+int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip)
{
- struct symbol *sym = self->ms.sym;
- struct objdump_line *objdump_line;
- char *line = NULL, *tmp, *tmp2, *c;
- size_t line_len;
- s64 line_ip, offset = -1;
-
- if (getline(&line, &line_len, file) < 0)
- return -1;
-
- if (!line)
- return -1;
-
- while (line_len != 0 && isspace(line[line_len - 1]))
- line[--line_len] = '\0';
-
- c = strchr(line, '\n');
- if (c)
- *c = 0;
-
- line_ip = -1;
-
- /*
- * Strip leading spaces:
- */
- tmp = line;
- while (*tmp) {
- if (*tmp != ' ')
- break;
- tmp++;
- }
-
- if (*tmp) {
- /*
- * Parse hexa addresses followed by ':'
- */
- line_ip = strtoull(tmp, &tmp2, 16);
- if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0')
- line_ip = -1;
- }
-
- if (line_ip != -1) {
- u64 start = map__rip_2objdump(self->ms.map, sym->start),
- end = map__rip_2objdump(self->ms.map, sym->end);
-
- offset = line_ip - start;
- if (offset < 0 || (u64)line_ip > end)
- offset = -1;
- }
-
- objdump_line = objdump_line__new(offset, line, privsize);
- if (objdump_line == NULL) {
- free(line);
- return -1;
- }
- objdump__add_line(head, objdump_line);
-
- return 0;
+ return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
}
-int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
- size_t privsize)
+int hist_entry__annotate(struct hist_entry *he, size_t privsize)
{
- struct symbol *sym = self->ms.sym;
- struct map *map = self->ms.map;
- struct dso *dso = map->dso;
- char *filename = dso__build_id_filename(dso, NULL, 0);
- bool free_filename = true;
- char command[PATH_MAX * 2];
- FILE *file;
- int err = 0;
- u64 len;
- char symfs_filename[PATH_MAX];
-
- if (filename) {
- snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
- symbol_conf.symfs, filename);
- }
-
- if (filename == NULL) {
- if (dso->has_build_id) {
- pr_err("Can't annotate %s: not enough memory\n",
- sym->name);
- return -ENOMEM;
- }
- goto fallback;
- } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
- strstr(command, "[kernel.kallsyms]") ||
- access(symfs_filename, R_OK)) {
- free(filename);
-fallback:
- /*
- * If we don't have build-ids or the build-id file isn't in the
- * cache, or is just a kallsyms file, well, lets hope that this
- * DSO is the same as when 'perf record' ran.
- */
- filename = dso->long_name;
- snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
- symbol_conf.symfs, filename);
- free_filename = false;
- }
-
- if (dso->origin == DSO__ORIG_KERNEL) {
- if (dso->annotate_warned)
- goto out_free_filename;
- err = -ENOENT;
- dso->annotate_warned = 1;
- pr_err("Can't annotate %s: No vmlinux file was found in the "
- "path\n", sym->name);
- goto out_free_filename;
- }
-
- pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
- filename, sym->name, map->unmap_ip(map, sym->start),
- map->unmap_ip(map, sym->end));
-
- len = sym->end - sym->start;
-
- pr_debug("annotating [%p] %30s : [%p] %30s\n",
- dso, dso->long_name, sym, sym->name);
-
- snprintf(command, sizeof(command),
- "objdump --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -dS -C %s|grep -v %s|expand",
- map__rip_2objdump(map, sym->start),
- map__rip_2objdump(map, sym->end),
- symfs_filename, filename);
-
- pr_debug("Executing: %s\n", command);
-
- file = popen(command, "r");
- if (!file)
- goto out_free_filename;
-
- while (!feof(file))
- if (hist_entry__parse_objdump_line(self, file, head, privsize) < 0)
- break;
-
- pclose(file);
-out_free_filename:
- if (free_filename)
- free(filename);
- return err;
+ return symbol__annotate(he->ms.sym, he->ms.map, privsize);
}
void hists__inc_nr_events(struct hists *self, u32 type)
@@ -1177,8 +984,12 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
size_t ret = 0;
for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
- const char *name = event__get_event_name(i);
+ const char *name;
+
+ if (self->stats.nr_events[i] == 0)
+ continue;
+ name = perf_event__name(i);
if (!strcmp(name, "UNKNOWN"))
continue;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ee789856a8c9..cb6858a2f9a3 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -9,33 +9,6 @@ extern struct callchain_param callchain_param;
struct hist_entry;
struct addr_location;
struct symbol;
-struct rb_root;
-
-struct objdump_line {
- struct list_head node;
- s64 offset;
- char *line;
-};
-
-void objdump_line__free(struct objdump_line *self);
-struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
- struct objdump_line *pos);
-
-struct sym_hist {
- u64 sum;
- u64 ip[0];
-};
-
-struct sym_ext {
- struct rb_node node;
- double percent;
- char *path;
-};
-
-struct sym_priv {
- struct sym_hist *hist;
- struct sym_ext *ext;
-};
/*
* The kernel collects the number of events it couldn't send in a stretch and
@@ -69,14 +42,13 @@ enum hist_column {
};
struct hists {
- struct rb_node rb_node;
struct rb_root entries;
u64 nr_entries;
struct events_stats stats;
- u64 config;
u64 event_stream;
- u32 type;
u16 col_len[HISTC_NR_COLS];
+ /* Best would be to reuse the session callchain cursor */
+ struct callchain_cursor callchain_cursor;
};
struct hist_entry *__hists__add_entry(struct hists *self,
@@ -102,9 +74,8 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp);
size_t hists__fprintf(struct hists *self, struct hists *pair,
bool show_displacement, FILE *fp);
-int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip);
-int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
- size_t privsize);
+int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr);
+int hist_entry__annotate(struct hist_entry *self, size_t privsize);
void hists__filter_by_dso(struct hists *self, const struct dso *dso);
void hists__filter_by_thread(struct hists *self, const struct thread *thread);
@@ -113,21 +84,18 @@ u16 hists__col_len(struct hists *self, enum hist_column col);
void hists__set_col_len(struct hists *self, enum hist_column col, u16 len);
bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len);
-#ifdef NO_NEWT_SUPPORT
-static inline int hists__browse(struct hists *self __used,
- const char *helpline __used,
- const char *ev_name __used)
-{
- return 0;
-}
+struct perf_evlist;
-static inline int hists__tui_browse_tree(struct rb_root *self __used,
- const char *help __used)
+#ifdef NO_NEWT_SUPPORT
+static inline
+int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __used,
+ const char *help __used)
{
return 0;
}
-static inline int hist_entry__tui_annotate(struct hist_entry *self __used)
+static inline int hist_entry__tui_annotate(struct hist_entry *self __used,
+ int evidx __used)
{
return 0;
}
@@ -135,14 +103,12 @@ static inline int hist_entry__tui_annotate(struct hist_entry *self __used)
#define KEY_RIGHT -2
#else
#include <newt.h>
-int hists__browse(struct hists *self, const char *helpline,
- const char *ev_name);
-int hist_entry__tui_annotate(struct hist_entry *self);
+int hist_entry__tui_annotate(struct hist_entry *self, int evidx);
#define KEY_LEFT NEWT_KEY_LEFT
#define KEY_RIGHT NEWT_KEY_RIGHT
-int hists__tui_browse_tree(struct rb_root *self, const char *help);
+int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help);
#endif
unsigned int hists__sort_list_width(struct hists *self);
diff --git a/tools/perf/util/include/linux/list.h b/tools/perf/util/include/linux/list.h
index f5ca26e53fbb..356c7e467b83 100644
--- a/tools/perf/util/include/linux/list.h
+++ b/tools/perf/util/include/linux/list.h
@@ -1,3 +1,4 @@
+#include <linux/kernel.h>
#include "../../../../include/linux/list.h"
#ifndef PERF_LIST_H
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 135f69baf966..54a7e2634d58 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1,6 +1,7 @@
#include "../../../include/linux/hw_breakpoint.h"
#include "util.h"
#include "../perf.h"
+#include "evlist.h"
#include "evsel.h"
#include "parse-options.h"
#include "parse-events.h"
@@ -11,10 +12,6 @@
#include "header.h"
#include "debugfs.h"
-int nr_counters;
-
-LIST_HEAD(evsel_list);
-
struct event_symbol {
u8 type;
u64 config;
@@ -271,6 +268,9 @@ const char *event_name(struct perf_evsel *evsel)
u64 config = evsel->attr.config;
int type = evsel->attr.type;
+ if (evsel->name)
+ return evsel->name;
+
return __event_name(type, config);
}
@@ -449,8 +449,8 @@ parse_single_tracepoint_event(char *sys_name,
/* sys + ':' + event + ':' + flags*/
#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128)
static enum event_result
-parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
- char *flags)
+parse_multiple_tracepoint_event(const struct option *opt, char *sys_name,
+ const char *evt_exp, char *flags)
{
char evt_path[MAXPATHLEN];
struct dirent *evt_ent;
@@ -483,15 +483,16 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
if (len < 0)
return EVT_FAILED;
- if (parse_events(NULL, event_opt, 0))
+ if (parse_events(opt, event_opt, 0))
return EVT_FAILED;
}
return EVT_HANDLED_ALL;
}
-static enum event_result parse_tracepoint_event(const char **strp,
- struct perf_event_attr *attr)
+static enum event_result
+parse_tracepoint_event(const struct option *opt, const char **strp,
+ struct perf_event_attr *attr)
{
const char *evt_name;
char *flags = NULL, *comma_loc;
@@ -530,7 +531,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
return EVT_FAILED;
if (strpbrk(evt_name, "*?")) {
*strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */
- return parse_multiple_tracepoint_event(sys_name, evt_name,
+ return parse_multiple_tracepoint_event(opt, sys_name, evt_name,
flags);
} else {
return parse_single_tracepoint_event(sys_name, evt_name,
@@ -740,11 +741,12 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
* Symbolic names are (almost) exactly matched.
*/
static enum event_result
-parse_event_symbols(const char **str, struct perf_event_attr *attr)
+parse_event_symbols(const struct option *opt, const char **str,
+ struct perf_event_attr *attr)
{
enum event_result ret;
- ret = parse_tracepoint_event(str, attr);
+ ret = parse_tracepoint_event(opt, str, attr);
if (ret != EVT_FAILED)
goto modifier;
@@ -778,14 +780,17 @@ modifier:
return ret;
}
-int parse_events(const struct option *opt __used, const char *str, int unset __used)
+int parse_events(const struct option *opt, const char *str, int unset __used)
{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
struct perf_event_attr attr;
enum event_result ret;
+ const char *ostr;
for (;;) {
+ ostr = str;
memset(&attr, 0, sizeof(attr));
- ret = parse_event_symbols(&str, &attr);
+ ret = parse_event_symbols(opt, &str, &attr);
if (ret == EVT_FAILED)
return -1;
@@ -794,12 +799,15 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
if (ret != EVT_HANDLED_ALL) {
struct perf_evsel *evsel;
- evsel = perf_evsel__new(&attr,
- nr_counters);
+ evsel = perf_evsel__new(&attr, evlist->nr_entries);
if (evsel == NULL)
return -1;
- list_add_tail(&evsel->node, &evsel_list);
- ++nr_counters;
+ perf_evlist__add(evlist, evsel);
+
+ evsel->name = calloc(str - ostr + 1, 1);
+ if (!evsel->name)
+ return -1;
+ strncpy(evsel->name, ostr, str - ostr);
}
if (*str == 0)
@@ -813,13 +821,14 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
return 0;
}
-int parse_filter(const struct option *opt __used, const char *str,
+int parse_filter(const struct option *opt, const char *str,
int unset __used)
{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
struct perf_evsel *last = NULL;
- if (!list_empty(&evsel_list))
- last = list_entry(evsel_list.prev, struct perf_evsel, node);
+ if (evlist->nr_entries > 0)
+ last = list_entry(evlist->entries.prev, struct perf_evsel, node);
if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) {
fprintf(stderr,
@@ -849,7 +858,7 @@ static const char * const event_type_descriptors[] = {
* Print the events from <debugfs_mount_point>/tracing/events
*/
-static void print_tracepoint_events(void)
+void print_tracepoint_events(const char *subsys_glob, const char *event_glob)
{
DIR *sys_dir, *evt_dir;
struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent;
@@ -864,6 +873,9 @@ static void print_tracepoint_events(void)
return;
for_each_subsystem(sys_dir, sys_dirent, sys_next) {
+ if (subsys_glob != NULL &&
+ !strglobmatch(sys_dirent.d_name, subsys_glob))
+ continue;
snprintf(dir_path, MAXPATHLEN, "%s/%s", debugfs_path,
sys_dirent.d_name);
@@ -872,6 +884,10 @@ static void print_tracepoint_events(void)
continue;
for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) {
+ if (event_glob != NULL &&
+ !strglobmatch(evt_dirent.d_name, event_glob))
+ continue;
+
snprintf(evt_path, MAXPATHLEN, "%s:%s",
sys_dirent.d_name, evt_dirent.d_name);
printf(" %-42s [%s]\n", evt_path,
@@ -923,13 +939,61 @@ int is_valid_tracepoint(const char *event_string)
return 0;
}
+void print_events_type(u8 type)
+{
+ struct event_symbol *syms = event_symbols;
+ unsigned int i;
+ char name[64];
+
+ for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
+ if (type != syms->type)
+ continue;
+
+ if (strlen(syms->alias))
+ snprintf(name, sizeof(name), "%s OR %s",
+ syms->symbol, syms->alias);
+ else
+ snprintf(name, sizeof(name), "%s", syms->symbol);
+
+ printf(" %-42s [%s]\n", name,
+ event_type_descriptors[type]);
+ }
+}
+
+int print_hwcache_events(const char *event_glob)
+{
+ unsigned int type, op, i, printed = 0;
+
+ for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+ for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+ /* skip invalid cache type */
+ if (!is_cache_op_valid(type, op))
+ continue;
+
+ for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+ char *name = event_cache_name(type, op, i);
+
+ if (event_glob != NULL &&
+ !strglobmatch(name, event_glob))
+ continue;
+
+ printf(" %-42s [%s]\n", name,
+ event_type_descriptors[PERF_TYPE_HW_CACHE]);
+ ++printed;
+ }
+ }
+ }
+
+ return printed;
+}
+
/*
* Print the help text for the event symbols:
*/
-void print_events(void)
+void print_events(const char *event_glob)
{
struct event_symbol *syms = event_symbols;
- unsigned int i, type, op, prev_type = -1;
+ unsigned int i, type, prev_type = -1, printed = 0, ntypes_printed = 0;
char name[40];
printf("\n");
@@ -938,8 +1002,16 @@ void print_events(void)
for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
type = syms->type;
- if (type != prev_type)
+ if (type != prev_type && printed) {
printf("\n");
+ printed = 0;
+ ntypes_printed++;
+ }
+
+ if (event_glob != NULL &&
+ !(strglobmatch(syms->symbol, event_glob) ||
+ (syms->alias && strglobmatch(syms->alias, event_glob))))
+ continue;
if (strlen(syms->alias))
sprintf(name, "%s OR %s", syms->symbol, syms->alias);
@@ -949,22 +1021,17 @@ void print_events(void)
event_type_descriptors[type]);
prev_type = type;
+ ++printed;
}
- printf("\n");
- for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
- for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
- /* skip invalid cache type */
- if (!is_cache_op_valid(type, op))
- continue;
-
- for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
- printf(" %-42s [%s]\n",
- event_cache_name(type, op, i),
- event_type_descriptors[PERF_TYPE_HW_CACHE]);
- }
- }
+ if (ntypes_printed) {
+ printed = 0;
+ printf("\n");
}
+ print_hwcache_events(event_glob);
+
+ if (event_glob != NULL)
+ return;
printf("\n");
printf(" %-42s [%s]\n",
@@ -977,37 +1044,7 @@ void print_events(void)
event_type_descriptors[PERF_TYPE_BREAKPOINT]);
printf("\n");
- print_tracepoint_events();
+ print_tracepoint_events(NULL, NULL);
exit(129);
}
-
-int perf_evsel_list__create_default(void)
-{
- struct perf_evsel *evsel;
- struct perf_event_attr attr;
-
- memset(&attr, 0, sizeof(attr));
- attr.type = PERF_TYPE_HARDWARE;
- attr.config = PERF_COUNT_HW_CPU_CYCLES;
-
- evsel = perf_evsel__new(&attr, 0);
-
- if (evsel == NULL)
- return -ENOMEM;
-
- list_add(&evsel->node, &evsel_list);
- ++nr_counters;
- return 0;
-}
-
-void perf_evsel_list__delete(void)
-{
- struct perf_evsel *pos, *n;
-
- list_for_each_entry_safe(pos, n, &evsel_list, node) {
- list_del_init(&pos->node);
- perf_evsel__delete(pos);
- }
- nr_counters = 0;
-}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 458e3ecf17af..212f88e07a9c 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -9,11 +9,6 @@
struct list_head;
struct perf_evsel;
-extern struct list_head evsel_list;
-
-int perf_evsel_list__create_default(void);
-void perf_evsel_list__delete(void);
-
struct option;
struct tracepoint_path {
@@ -25,8 +20,6 @@ struct tracepoint_path {
extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
extern bool have_tracepoints(struct list_head *evlist);
-extern int nr_counters;
-
const char *event_name(struct perf_evsel *event);
extern const char *__event_name(int type, u64 config);
@@ -35,7 +28,10 @@ extern int parse_filter(const struct option *opt, const char *str, int unset);
#define EVENTS_HELP_MAX (128*1024)
-extern void print_events(void);
+void print_events(const char *event_glob);
+void print_events_type(u8 type);
+void print_tracepoint_events(const char *subsys_glob, const char *event_glob);
+int print_hwcache_events(const char *event_glob);
extern int is_valid_tracepoint(const char *event_string);
extern char debugfs_path[];
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 6e29d9c9dccc..5ddee66020a7 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -31,6 +31,7 @@
#include <string.h>
#include <stdarg.h>
#include <limits.h>
+#include <elf.h>
#undef _GNU_SOURCE
#include "util.h"
@@ -111,7 +112,25 @@ static struct symbol *__find_kernel_function_by_name(const char *name,
NULL);
}
-const char *kernel_get_module_path(const char *module)
+static struct map *kernel_get_module_map(const char *module)
+{
+ struct rb_node *nd;
+ struct map_groups *grp = &machine.kmaps;
+
+ if (!module)
+ module = "kernel";
+
+ for (nd = rb_first(&grp->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) {
+ struct map *pos = rb_entry(nd, struct map, rb_node);
+ if (strncmp(pos->dso->short_name + 1, module,
+ pos->dso->short_name_len - 2) == 0) {
+ return pos;
+ }
+ }
+ return NULL;
+}
+
+static struct dso *kernel_get_module_dso(const char *module)
{
struct dso *dso;
struct map *map;
@@ -141,7 +160,13 @@ const char *kernel_get_module_path(const char *module)
}
}
found:
- return dso->long_name;
+ return dso;
+}
+
+const char *kernel_get_module_path(const char *module)
+{
+ struct dso *dso = kernel_get_module_dso(module);
+ return (dso) ? dso->long_name : NULL;
}
#ifdef DWARF_SUPPORT
@@ -384,7 +409,7 @@ int show_line_range(struct line_range *lr, const char *module)
setup_pager();
if (lr->function)
- fprintf(stdout, "<%s:%d>\n", lr->function,
+ fprintf(stdout, "<%s@%s:%d>\n", lr->function, lr->path,
lr->start - lr->offset);
else
fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
@@ -426,12 +451,14 @@ end:
}
static int show_available_vars_at(int fd, struct perf_probe_event *pev,
- int max_vls, bool externs)
+ int max_vls, struct strfilter *_filter,
+ bool externs)
{
char *buf;
- int ret, i;
+ int ret, i, nvars;
struct str_node *node;
struct variable_list *vls = NULL, *vl;
+ const char *var;
buf = synthesize_perf_probe_point(&pev->point);
if (!buf)
@@ -439,36 +466,45 @@ static int show_available_vars_at(int fd, struct perf_probe_event *pev,
pr_debug("Searching variables at %s\n", buf);
ret = find_available_vars_at(fd, pev, &vls, max_vls, externs);
- if (ret > 0) {
- /* Some variables were found */
- fprintf(stdout, "Available variables at %s\n", buf);
- for (i = 0; i < ret; i++) {
- vl = &vls[i];
- /*
- * A probe point might be converted to
- * several trace points.
- */
- fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
- vl->point.offset);
- free(vl->point.symbol);
- if (vl->vars) {
- strlist__for_each(node, vl->vars)
+ if (ret <= 0) {
+ pr_err("Failed to find variables at %s (%d)\n", buf, ret);
+ goto end;
+ }
+ /* Some variables are found */
+ fprintf(stdout, "Available variables at %s\n", buf);
+ for (i = 0; i < ret; i++) {
+ vl = &vls[i];
+ /*
+ * A probe point might be converted to
+ * several trace points.
+ */
+ fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
+ vl->point.offset);
+ free(vl->point.symbol);
+ nvars = 0;
+ if (vl->vars) {
+ strlist__for_each(node, vl->vars) {
+ var = strchr(node->s, '\t') + 1;
+ if (strfilter__compare(_filter, var)) {
fprintf(stdout, "\t\t%s\n", node->s);
- strlist__delete(vl->vars);
- } else
- fprintf(stdout, "(No variables)\n");
+ nvars++;
+ }
+ }
+ strlist__delete(vl->vars);
}
- free(vls);
- } else
- pr_err("Failed to find variables at %s (%d)\n", buf, ret);
-
+ if (nvars == 0)
+ fprintf(stdout, "\t\t(No matched variables)\n");
+ }
+ free(vls);
+end:
free(buf);
return ret;
}
/* Show available variables on given probe point */
int show_available_vars(struct perf_probe_event *pevs, int npevs,
- int max_vls, const char *module, bool externs)
+ int max_vls, const char *module,
+ struct strfilter *_filter, bool externs)
{
int i, fd, ret = 0;
@@ -485,7 +521,8 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
setup_pager();
for (i = 0; i < npevs && ret >= 0; i++)
- ret = show_available_vars_at(fd, &pevs[i], max_vls, externs);
+ ret = show_available_vars_at(fd, &pevs[i], max_vls, _filter,
+ externs);
close(fd);
return ret;
@@ -531,7 +568,9 @@ int show_line_range(struct line_range *lr __unused, const char *module __unused)
int show_available_vars(struct perf_probe_event *pevs __unused,
int npevs __unused, int max_vls __unused,
- const char *module __unused, bool externs __unused)
+ const char *module __unused,
+ struct strfilter *filter __unused,
+ bool externs __unused)
{
pr_warning("Debuginfo-analysis is not supported.\n");
return -ENOSYS;
@@ -556,11 +595,11 @@ static int parse_line_num(char **ptr, int *val, const char *what)
* The line range syntax is described by:
*
* SRC[:SLN[+NUM|-ELN]]
- * FNC[:SLN[+NUM|-ELN]]
+ * FNC[@SRC][:SLN[+NUM|-ELN]]
*/
int parse_line_range_desc(const char *arg, struct line_range *lr)
{
- char *range, *name = strdup(arg);
+ char *range, *file, *name = strdup(arg);
int err;
if (!name)
@@ -610,7 +649,16 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
}
}
- if (strchr(name, '.'))
+ file = strchr(name, '@');
+ if (file) {
+ *file = '\0';
+ lr->file = strdup(++file);
+ if (lr->file == NULL) {
+ err = -ENOMEM;
+ goto err;
+ }
+ lr->function = name;
+ } else if (strchr(name, '.'))
lr->file = name;
else
lr->function = name;
@@ -1784,9 +1832,12 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
}
/* Loop 2: add all events */
- for (i = 0; i < npevs && ret >= 0; i++)
+ for (i = 0; i < npevs; i++) {
ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs,
pkgs[i].ntevs, force_add);
+ if (ret < 0)
+ break;
+ }
end:
/* Loop 3: cleanup and free trace events */
for (i = 0; i < npevs; i++) {
@@ -1912,4 +1963,46 @@ int del_perf_probe_events(struct strlist *dellist)
return ret;
}
+/* TODO: don't use a global variable for filter ... */
+static struct strfilter *available_func_filter;
+
+/*
+ * If a symbol corresponds to a function with global binding and
+ * matches filter return 0. For all others return 1.
+ */
+static int filter_available_functions(struct map *map __unused,
+ struct symbol *sym)
+{
+ if (sym->binding == STB_GLOBAL &&
+ strfilter__compare(available_func_filter, sym->name))
+ return 0;
+ return 1;
+}
+
+int show_available_funcs(const char *module, struct strfilter *_filter)
+{
+ struct map *map;
+ int ret;
+
+ setup_pager();
+
+ ret = init_vmlinux();
+ if (ret < 0)
+ return ret;
+ map = kernel_get_module_map(module);
+ if (!map) {
+ pr_err("Failed to find %s map.\n", (module) ? : "kernel");
+ return -EINVAL;
+ }
+ available_func_filter = _filter;
+ if (map__load(map, filter_available_functions)) {
+ pr_err("Failed to load map.\n");
+ return -EINVAL;
+ }
+ if (!dso__sorted_by_name(map->dso, map->type))
+ dso__sort_by_name(map->dso, map->type);
+
+ dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
+ return 0;
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 5accbedfea37..3434fc9d79d5 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -3,6 +3,7 @@
#include <stdbool.h>
#include "strlist.h"
+#include "strfilter.h"
extern bool probe_event_dry_run;
@@ -126,7 +127,8 @@ extern int show_perf_probe_events(void);
extern int show_line_range(struct line_range *lr, const char *module);
extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
int max_probe_points, const char *module,
- bool externs);
+ struct strfilter *filter, bool externs);
+extern int show_available_funcs(const char *module, struct strfilter *filter);
/* Maximum index number of event-name postfix */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index ab83b6ac5d65..194f9e2a3285 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -33,6 +33,7 @@
#include <ctype.h>
#include <dwarf-regs.h>
+#include <linux/bitops.h>
#include "event.h"
#include "debug.h"
#include "util.h"
@@ -280,6 +281,19 @@ static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
return name ? (strcmp(tname, name) == 0) : false;
}
+/* Get callsite line number of inline-function instance */
+static int die_get_call_lineno(Dwarf_Die *in_die)
+{
+ Dwarf_Attribute attr;
+ Dwarf_Word ret;
+
+ if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
+ return -ENOENT;
+
+ dwarf_formudata(&attr, &ret);
+ return (int)ret;
+}
+
/* Get type die */
static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
{
@@ -320,13 +334,23 @@ static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
return vr_die;
}
-static bool die_is_signed_type(Dwarf_Die *tp_die)
+static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+ Dwarf_Word *result)
{
Dwarf_Attribute attr;
+
+ if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
+ dwarf_formudata(&attr, result) != 0)
+ return -ENOENT;
+
+ return 0;
+}
+
+static bool die_is_signed_type(Dwarf_Die *tp_die)
+{
Dwarf_Word ret;
- if (dwarf_attr(tp_die, DW_AT_encoding, &attr) == NULL ||
- dwarf_formudata(&attr, &ret) != 0)
+ if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret))
return false;
return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
@@ -335,11 +359,29 @@ static bool die_is_signed_type(Dwarf_Die *tp_die)
static int die_get_byte_size(Dwarf_Die *tp_die)
{
- Dwarf_Attribute attr;
Dwarf_Word ret;
- if (dwarf_attr(tp_die, DW_AT_byte_size, &attr) == NULL ||
- dwarf_formudata(&attr, &ret) != 0)
+ if (die_get_attr_udata(tp_die, DW_AT_byte_size, &ret))
+ return 0;
+
+ return (int)ret;
+}
+
+static int die_get_bit_size(Dwarf_Die *tp_die)
+{
+ Dwarf_Word ret;
+
+ if (die_get_attr_udata(tp_die, DW_AT_bit_size, &ret))
+ return 0;
+
+ return (int)ret;
+}
+
+static int die_get_bit_offset(Dwarf_Die *tp_die)
+{
+ Dwarf_Word ret;
+
+ if (die_get_attr_udata(tp_die, DW_AT_bit_offset, &ret))
return 0;
return (int)ret;
@@ -458,6 +500,151 @@ static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
}
+/* Walker on lines (Note: line number will not be sorted) */
+typedef int (* line_walk_handler_t) (const char *fname, int lineno,
+ Dwarf_Addr addr, void *data);
+
+struct __line_walk_param {
+ const char *fname;
+ line_walk_handler_t handler;
+ void *data;
+ int retval;
+};
+
+static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
+{
+ struct __line_walk_param *lw = data;
+ Dwarf_Addr addr;
+ int lineno;
+
+ if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
+ lineno = die_get_call_lineno(in_die);
+ if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
+ lw->retval = lw->handler(lw->fname, lineno, addr,
+ lw->data);
+ if (lw->retval != 0)
+ return DIE_FIND_CB_FOUND;
+ }
+ }
+ return DIE_FIND_CB_SIBLING;
+}
+
+/* Walk on lines of blocks included in given DIE */
+static int __die_walk_funclines(Dwarf_Die *sp_die,
+ line_walk_handler_t handler, void *data)
+{
+ struct __line_walk_param lw = {
+ .handler = handler,
+ .data = data,
+ .retval = 0,
+ };
+ Dwarf_Die die_mem;
+ Dwarf_Addr addr;
+ int lineno;
+
+ /* Handle function declaration line */
+ lw.fname = dwarf_decl_file(sp_die);
+ if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
+ dwarf_entrypc(sp_die, &addr) == 0) {
+ lw.retval = handler(lw.fname, lineno, addr, data);
+ if (lw.retval != 0)
+ goto done;
+ }
+ die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
+done:
+ return lw.retval;
+}
+
+static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
+{
+ struct __line_walk_param *lw = data;
+
+ lw->retval = __die_walk_funclines(sp_die, lw->handler, lw->data);
+ if (lw->retval != 0)
+ return DWARF_CB_ABORT;
+
+ return DWARF_CB_OK;
+}
+
+/*
+ * Walk on lines inside given PDIE. If the PDIE is subprogram, walk only on
+ * the lines inside the subprogram, otherwise PDIE must be a CU DIE.
+ */
+static int die_walk_lines(Dwarf_Die *pdie, line_walk_handler_t handler,
+ void *data)
+{
+ Dwarf_Lines *lines;
+ Dwarf_Line *line;
+ Dwarf_Addr addr;
+ const char *fname;
+ int lineno, ret = 0;
+ Dwarf_Die die_mem, *cu_die;
+ size_t nlines, i;
+
+ /* Get the CU die */
+ if (dwarf_tag(pdie) == DW_TAG_subprogram)
+ cu_die = dwarf_diecu(pdie, &die_mem, NULL, NULL);
+ else
+ cu_die = pdie;
+ if (!cu_die) {
+ pr_debug2("Failed to get CU from subprogram\n");
+ return -EINVAL;
+ }
+
+ /* Get lines list in the CU */
+ if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
+ pr_debug2("Failed to get source lines on this CU.\n");
+ return -ENOENT;
+ }
+ pr_debug2("Get %zd lines from this CU\n", nlines);
+
+ /* Walk on the lines on lines list */
+ for (i = 0; i < nlines; i++) {
+ line = dwarf_onesrcline(lines, i);
+ if (line == NULL ||
+ dwarf_lineno(line, &lineno) != 0 ||
+ dwarf_lineaddr(line, &addr) != 0) {
+ pr_debug2("Failed to get line info. "
+ "Possible error in debuginfo.\n");
+ continue;
+ }
+ /* Filter lines based on address */
+ if (pdie != cu_die)
+ /*
+ * Address filtering
+ * The line is included in given function, and
+ * no inline block includes it.
+ */
+ if (!dwarf_haspc(pdie, addr) ||
+ die_find_inlinefunc(pdie, addr, &die_mem))
+ continue;
+ /* Get source line */
+ fname = dwarf_linesrc(line, NULL, NULL);
+
+ ret = handler(fname, lineno, addr, data);
+ if (ret != 0)
+ return ret;
+ }
+
+ /*
+ * Dwarf lines doesn't include function declarations and inlined
+ * subroutines. We have to check functions list or given function.
+ */
+ if (pdie != cu_die)
+ ret = __die_walk_funclines(pdie, handler, data);
+ else {
+ struct __line_walk_param param = {
+ .handler = handler,
+ .data = data,
+ .retval = 0,
+ };
+ dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
+ ret = param.retval;
+ }
+
+ return ret;
+}
+
struct __find_variable_param {
const char *name;
Dwarf_Addr addr;
@@ -669,6 +856,8 @@ static_var:
return 0;
}
+#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long))
+
static int convert_variable_type(Dwarf_Die *vr_die,
struct probe_trace_arg *tvar,
const char *cast)
@@ -685,6 +874,14 @@ static int convert_variable_type(Dwarf_Die *vr_die,
return (tvar->type == NULL) ? -ENOMEM : 0;
}
+ if (die_get_bit_size(vr_die) != 0) {
+ /* This is a bitfield */
+ ret = snprintf(buf, 16, "b%d@%d/%zd", die_get_bit_size(vr_die),
+ die_get_bit_offset(vr_die),
+ BYTES_TO_BITS(die_get_byte_size(vr_die)));
+ goto formatted;
+ }
+
if (die_get_real_type(vr_die, &type) == NULL) {
pr_warning("Failed to get a type information of %s.\n",
dwarf_diename(vr_die));
@@ -729,29 +926,31 @@ static int convert_variable_type(Dwarf_Die *vr_die,
return (tvar->type == NULL) ? -ENOMEM : 0;
}
- ret = die_get_byte_size(&type) * 8;
- if (ret) {
- /* Check the bitwidth */
- if (ret > MAX_BASIC_TYPE_BITS) {
- pr_info("%s exceeds max-bitwidth."
- " Cut down to %d bits.\n",
- dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
- ret = MAX_BASIC_TYPE_BITS;
- }
+ ret = BYTES_TO_BITS(die_get_byte_size(&type));
+ if (!ret)
+ /* No size ... try to use default type */
+ return 0;
- ret = snprintf(buf, 16, "%c%d",
- die_is_signed_type(&type) ? 's' : 'u', ret);
- if (ret < 0 || ret >= 16) {
- if (ret >= 16)
- ret = -E2BIG;
- pr_warning("Failed to convert variable type: %s\n",
- strerror(-ret));
- return ret;
- }
- tvar->type = strdup(buf);
- if (tvar->type == NULL)
- return -ENOMEM;
+ /* Check the bitwidth */
+ if (ret > MAX_BASIC_TYPE_BITS) {
+ pr_info("%s exceeds max-bitwidth. Cut down to %d bits.\n",
+ dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
+ ret = MAX_BASIC_TYPE_BITS;
+ }
+ ret = snprintf(buf, 16, "%c%d",
+ die_is_signed_type(&type) ? 's' : 'u', ret);
+
+formatted:
+ if (ret < 0 || ret >= 16) {
+ if (ret >= 16)
+ ret = -E2BIG;
+ pr_warning("Failed to convert variable type: %s\n",
+ strerror(-ret));
+ return ret;
}
+ tvar->type = strdup(buf);
+ if (tvar->type == NULL)
+ return -ENOMEM;
return 0;
}
@@ -1050,157 +1249,102 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
return ret;
}
-/* Find probe point from its line number */
-static int find_probe_point_by_line(struct probe_finder *pf)
+static int probe_point_line_walker(const char *fname, int lineno,
+ Dwarf_Addr addr, void *data)
{
- Dwarf_Lines *lines;
- Dwarf_Line *line;
- size_t nlines, i;
- Dwarf_Addr addr;
- int lineno;
- int ret = 0;
-
- if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
- pr_warning("No source lines found.\n");
- return -ENOENT;
- }
+ struct probe_finder *pf = data;
+ int ret;
- for (i = 0; i < nlines && ret == 0; i++) {
- line = dwarf_onesrcline(lines, i);
- if (dwarf_lineno(line, &lineno) != 0 ||
- lineno != pf->lno)
- continue;
+ if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0)
+ return 0;
- /* TODO: Get fileno from line, but how? */
- if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0)
- continue;
+ pf->addr = addr;
+ ret = call_probe_finder(NULL, pf);
- if (dwarf_lineaddr(line, &addr) != 0) {
- pr_warning("Failed to get the address of the line.\n");
- return -ENOENT;
- }
- pr_debug("Probe line found: line[%d]:%d addr:0x%jx\n",
- (int)i, lineno, (uintmax_t)addr);
- pf->addr = addr;
+ /* Continue if no error, because the line will be in inline function */
+ return ret < 0 ? ret : 0;
+}
- ret = call_probe_finder(NULL, pf);
- /* Continuing, because target line might be inlined. */
- }
- return ret;
+/* Find probe point from its line number */
+static int find_probe_point_by_line(struct probe_finder *pf)
+{
+ return die_walk_lines(&pf->cu_die, probe_point_line_walker, pf);
}
/* Find lines which match lazy pattern */
static int find_lazy_match_lines(struct list_head *head,
const char *fname, const char *pat)
{
- char *fbuf, *p1, *p2;
- int fd, line, nlines = -1;
- struct stat st;
-
- fd = open(fname, O_RDONLY);
- if (fd < 0) {
- pr_warning("Failed to open %s: %s\n", fname, strerror(-fd));
+ FILE *fp;
+ char *line = NULL;
+ size_t line_len;
+ ssize_t len;
+ int count = 0, linenum = 1;
+
+ fp = fopen(fname, "r");
+ if (!fp) {
+ pr_warning("Failed to open %s: %s\n", fname, strerror(errno));
return -errno;
}
- if (fstat(fd, &st) < 0) {
- pr_warning("Failed to get the size of %s: %s\n",
- fname, strerror(errno));
- nlines = -errno;
- goto out_close;
- }
-
- nlines = -ENOMEM;
- fbuf = malloc(st.st_size + 2);
- if (fbuf == NULL)
- goto out_close;
- if (read(fd, fbuf, st.st_size) < 0) {
- pr_warning("Failed to read %s: %s\n", fname, strerror(errno));
- nlines = -errno;
- goto out_free_fbuf;
- }
- fbuf[st.st_size] = '\n'; /* Dummy line */
- fbuf[st.st_size + 1] = '\0';
- p1 = fbuf;
- line = 1;
- nlines = 0;
- while ((p2 = strchr(p1, '\n')) != NULL) {
- *p2 = '\0';
- if (strlazymatch(p1, pat)) {
- line_list__add_line(head, line);
- nlines++;
+ while ((len = getline(&line, &line_len, fp)) > 0) {
+
+ if (line[len - 1] == '\n')
+ line[len - 1] = '\0';
+
+ if (strlazymatch(line, pat)) {
+ line_list__add_line(head, linenum);
+ count++;
}
- line++;
- p1 = p2 + 1;
+ linenum++;
}
-out_free_fbuf:
- free(fbuf);
-out_close:
- close(fd);
- return nlines;
+
+ if (ferror(fp))
+ count = -errno;
+ free(line);
+ fclose(fp);
+
+ if (count == 0)
+ pr_debug("No matched lines found in %s.\n", fname);
+ return count;
+}
+
+static int probe_point_lazy_walker(const char *fname, int lineno,
+ Dwarf_Addr addr, void *data)
+{
+ struct probe_finder *pf = data;
+ int ret;
+
+ if (!line_list__has_line(&pf->lcache, lineno) ||
+ strtailcmp(fname, pf->fname) != 0)
+ return 0;
+
+ pr_debug("Probe line found: line:%d addr:0x%llx\n",
+ lineno, (unsigned long long)addr);
+ pf->addr = addr;
+ ret = call_probe_finder(NULL, pf);
+
+ /*
+ * Continue if no error, because the lazy pattern will match
+ * to other lines
+ */
+ return ret < 0 ? ret : 0;
}
/* Find probe points from lazy pattern */
static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
{
- Dwarf_Lines *lines;
- Dwarf_Line *line;
- size_t nlines, i;
- Dwarf_Addr addr;
- Dwarf_Die die_mem;
- int lineno;
int ret = 0;
if (list_empty(&pf->lcache)) {
/* Matching lazy line pattern */
ret = find_lazy_match_lines(&pf->lcache, pf->fname,
pf->pev->point.lazy_line);
- if (ret == 0) {
- pr_debug("No matched lines found in %s.\n", pf->fname);
- return 0;
- } else if (ret < 0)
+ if (ret <= 0)
return ret;
}
- if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
- pr_warning("No source lines found.\n");
- return -ENOENT;
- }
-
- for (i = 0; i < nlines && ret >= 0; i++) {
- line = dwarf_onesrcline(lines, i);
-
- if (dwarf_lineno(line, &lineno) != 0 ||
- !line_list__has_line(&pf->lcache, lineno))
- continue;
-
- /* TODO: Get fileno from line, but how? */
- if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0)
- continue;
-
- if (dwarf_lineaddr(line, &addr) != 0) {
- pr_debug("Failed to get the address of line %d.\n",
- lineno);
- continue;
- }
- if (sp_die) {
- /* Address filtering 1: does sp_die include addr? */
- if (!dwarf_haspc(sp_die, addr))
- continue;
- /* Address filtering 2: No child include addr? */
- if (die_find_inlinefunc(sp_die, addr, &die_mem))
- continue;
- }
-
- pr_debug("Probe line found: line[%d]:%d addr:0x%llx\n",
- (int)i, lineno, (unsigned long long)addr);
- pf->addr = addr;
-
- ret = call_probe_finder(sp_die, pf);
- /* Continuing, because target line might be inlined. */
- }
- /* TODO: deallocate lines, but how? */
- return ret;
+ return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
}
/* Callback parameter with return value */
@@ -1318,8 +1462,7 @@ static int find_probes(int fd, struct probe_finder *pf)
off = 0;
line_list__init(&pf->lcache);
/* Loop on CUs (Compilation Unit) */
- while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) &&
- ret >= 0) {
+ while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
/* Get the DIE(Debugging Information Entry) of this CU */
diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die);
if (!diep)
@@ -1340,6 +1483,8 @@ static int find_probes(int fd, struct probe_finder *pf)
pf->lno = pp->line;
ret = find_probe_point_by_line(pf);
}
+ if (ret < 0)
+ break;
}
off = noff;
}
@@ -1644,91 +1789,28 @@ static int line_range_add_line(const char *src, unsigned int lineno,
return line_list__add_line(&lr->line_list, lineno);
}
-/* Search function declaration lines */
-static int line_range_funcdecl_cb(Dwarf_Die *sp_die, void *data)
+static int line_range_walk_cb(const char *fname, int lineno,
+ Dwarf_Addr addr __used,
+ void *data)
{
- struct dwarf_callback_param *param = data;
- struct line_finder *lf = param->data;
- const char *src;
- int lineno;
+ struct line_finder *lf = data;
- src = dwarf_decl_file(sp_die);
- if (src && strtailcmp(src, lf->fname) != 0)
- return DWARF_CB_OK;
-
- if (dwarf_decl_line(sp_die, &lineno) != 0 ||
+ if ((strtailcmp(fname, lf->fname) != 0) ||
(lf->lno_s > lineno || lf->lno_e < lineno))
- return DWARF_CB_OK;
+ return 0;
- param->retval = line_range_add_line(src, lineno, lf->lr);
- if (param->retval < 0)
- return DWARF_CB_ABORT;
- return DWARF_CB_OK;
-}
+ if (line_range_add_line(fname, lineno, lf->lr) < 0)
+ return -EINVAL;
-static int find_line_range_func_decl_lines(struct line_finder *lf)
-{
- struct dwarf_callback_param param = {.data = (void *)lf, .retval = 0};
- dwarf_getfuncs(&lf->cu_die, line_range_funcdecl_cb, &param, 0);
- return param.retval;
+ return 0;
}
/* Find line range from its line number */
static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
{
- Dwarf_Lines *lines;
- Dwarf_Line *line;
- size_t nlines, i;
- Dwarf_Addr addr;
- int lineno, ret = 0;
- const char *src;
- Dwarf_Die die_mem;
-
- line_list__init(&lf->lr->line_list);
- if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) {
- pr_warning("No source lines found.\n");
- return -ENOENT;
- }
-
- /* Search probable lines on lines list */
- for (i = 0; i < nlines; i++) {
- line = dwarf_onesrcline(lines, i);
- if (dwarf_lineno(line, &lineno) != 0 ||
- (lf->lno_s > lineno || lf->lno_e < lineno))
- continue;
-
- if (sp_die) {
- /* Address filtering 1: does sp_die include addr? */
- if (dwarf_lineaddr(line, &addr) != 0 ||
- !dwarf_haspc(sp_die, addr))
- continue;
-
- /* Address filtering 2: No child include addr? */
- if (die_find_inlinefunc(sp_die, addr, &die_mem))
- continue;
- }
-
- /* TODO: Get fileno from line, but how? */
- src = dwarf_linesrc(line, NULL, NULL);
- if (strtailcmp(src, lf->fname) != 0)
- continue;
-
- ret = line_range_add_line(src, lineno, lf->lr);
- if (ret < 0)
- return ret;
- }
+ int ret;
- /*
- * Dwarf lines doesn't include function declarations. We have to
- * check functions list or given function.
- */
- if (sp_die) {
- src = dwarf_decl_file(sp_die);
- if (src && dwarf_decl_line(sp_die, &lineno) == 0 &&
- (lf->lno_s <= lineno && lf->lno_e >= lineno))
- ret = line_range_add_line(src, lineno, lf->lr);
- } else
- ret = find_line_range_func_decl_lines(lf);
+ ret = die_walk_lines(sp_die ?: &lf->cu_die, line_range_walk_cb, lf);
/* Update status */
if (ret >= 0)
@@ -1758,9 +1840,6 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
struct line_finder *lf = param->data;
struct line_range *lr = lf->lr;
- pr_debug("find (%llx) %s\n",
- (unsigned long long)dwarf_dieoffset(sp_die),
- dwarf_diename(sp_die));
if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
die_compare_name(sp_die, lr->function)) {
lf->fname = dwarf_decl_file(sp_die);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
new file mode 100644
index 000000000000..a9f2d7e1204d
--- /dev/null
+++ b/tools/perf/util/python.c
@@ -0,0 +1,896 @@
+#include <Python.h>
+#include <structmember.h>
+#include <inttypes.h>
+#include <poll.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "event.h"
+#include "cpumap.h"
+#include "thread_map.h"
+
+/* Define PyVarObject_HEAD_INIT for python 2.5 */
+#ifndef PyVarObject_HEAD_INIT
+# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
+#endif
+
+struct throttle_event {
+ struct perf_event_header header;
+ u64 time;
+ u64 id;
+ u64 stream_id;
+};
+
+PyMODINIT_FUNC initperf(void);
+
+#define member_def(type, member, ptype, help) \
+ { #member, ptype, \
+ offsetof(struct pyrf_event, event) + offsetof(struct type, member), \
+ 0, help }
+
+#define sample_member_def(name, member, ptype, help) \
+ { #name, ptype, \
+ offsetof(struct pyrf_event, sample) + offsetof(struct perf_sample, member), \
+ 0, help }
+
+struct pyrf_event {
+ PyObject_HEAD
+ struct perf_sample sample;
+ union perf_event event;
+};
+
+#define sample_members \
+ sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \
+ sample_member_def(sample_pid, pid, T_INT, "event pid"), \
+ sample_member_def(sample_tid, tid, T_INT, "event tid"), \
+ sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \
+ sample_member_def(sample_addr, addr, T_ULONGLONG, "event addr"), \
+ sample_member_def(sample_id, id, T_ULONGLONG, "event id"), \
+ sample_member_def(sample_stream_id, stream_id, T_ULONGLONG, "event stream id"), \
+ sample_member_def(sample_period, period, T_ULONGLONG, "event period"), \
+ sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"),
+
+static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object.");
+
+static PyMemberDef pyrf_mmap_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(mmap_event, pid, T_UINT, "event pid"),
+ member_def(mmap_event, tid, T_UINT, "event tid"),
+ member_def(mmap_event, start, T_ULONGLONG, "start of the map"),
+ member_def(mmap_event, len, T_ULONGLONG, "map length"),
+ member_def(mmap_event, pgoff, T_ULONGLONG, "page offset"),
+ member_def(mmap_event, filename, T_STRING_INPLACE, "backing store"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
+{
+ PyObject *ret;
+ char *s;
+
+ if (asprintf(&s, "{ type: mmap, pid: %u, tid: %u, start: %#" PRIx64 ", "
+ "length: %#" PRIx64 ", offset: %#" PRIx64 ", "
+ "filename: %s }",
+ pevent->event.mmap.pid, pevent->event.mmap.tid,
+ pevent->event.mmap.start, pevent->event.mmap.len,
+ pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) {
+ ret = PyErr_NoMemory();
+ } else {
+ ret = PyString_FromString(s);
+ free(s);
+ }
+ return ret;
+}
+
+static PyTypeObject pyrf_mmap_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.mmap_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_mmap_event__doc,
+ .tp_members = pyrf_mmap_event__members,
+ .tp_repr = (reprfunc)pyrf_mmap_event__repr,
+};
+
+static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object.");
+
+static PyMemberDef pyrf_task_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(fork_event, pid, T_UINT, "event pid"),
+ member_def(fork_event, ppid, T_UINT, "event ppid"),
+ member_def(fork_event, tid, T_UINT, "event tid"),
+ member_def(fork_event, ptid, T_UINT, "event ptid"),
+ member_def(fork_event, time, T_ULONGLONG, "timestamp"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent)
+{
+ return PyString_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
+ "ptid: %u, time: %" PRIu64 "}",
+ pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit",
+ pevent->event.fork.pid,
+ pevent->event.fork.ppid,
+ pevent->event.fork.tid,
+ pevent->event.fork.ptid,
+ pevent->event.fork.time);
+}
+
+static PyTypeObject pyrf_task_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.task_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_task_event__doc,
+ .tp_members = pyrf_task_event__members,
+ .tp_repr = (reprfunc)pyrf_task_event__repr,
+};
+
+static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object.");
+
+static PyMemberDef pyrf_comm_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(comm_event, pid, T_UINT, "event pid"),
+ member_def(comm_event, tid, T_UINT, "event tid"),
+ member_def(comm_event, comm, T_STRING_INPLACE, "process name"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent)
+{
+ return PyString_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
+ pevent->event.comm.pid,
+ pevent->event.comm.tid,
+ pevent->event.comm.comm);
+}
+
+static PyTypeObject pyrf_comm_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.comm_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_comm_event__doc,
+ .tp_members = pyrf_comm_event__members,
+ .tp_repr = (reprfunc)pyrf_comm_event__repr,
+};
+
+static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object.");
+
+static PyMemberDef pyrf_throttle_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(throttle_event, time, T_ULONGLONG, "timestamp"),
+ member_def(throttle_event, id, T_ULONGLONG, "event id"),
+ member_def(throttle_event, stream_id, T_ULONGLONG, "event stream id"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent)
+{
+ struct throttle_event *te = (struct throttle_event *)(&pevent->event.header + 1);
+
+ return PyString_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64
+ ", stream_id: %" PRIu64 " }",
+ pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un",
+ te->time, te->id, te->stream_id);
+}
+
+static PyTypeObject pyrf_throttle_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.throttle_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_throttle_event__doc,
+ .tp_members = pyrf_throttle_event__members,
+ .tp_repr = (reprfunc)pyrf_throttle_event__repr,
+};
+
+static int pyrf_event__setup_types(void)
+{
+ int err;
+ pyrf_mmap_event__type.tp_new =
+ pyrf_task_event__type.tp_new =
+ pyrf_comm_event__type.tp_new =
+ pyrf_throttle_event__type.tp_new = PyType_GenericNew;
+ err = PyType_Ready(&pyrf_mmap_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_task_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_comm_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_throttle_event__type);
+ if (err < 0)
+ goto out;
+out:
+ return err;
+}
+
+static PyTypeObject *pyrf_event__type[] = {
+ [PERF_RECORD_MMAP] = &pyrf_mmap_event__type,
+ [PERF_RECORD_LOST] = &pyrf_mmap_event__type,
+ [PERF_RECORD_COMM] = &pyrf_comm_event__type,
+ [PERF_RECORD_EXIT] = &pyrf_task_event__type,
+ [PERF_RECORD_THROTTLE] = &pyrf_throttle_event__type,
+ [PERF_RECORD_UNTHROTTLE] = &pyrf_throttle_event__type,
+ [PERF_RECORD_FORK] = &pyrf_task_event__type,
+ [PERF_RECORD_READ] = &pyrf_mmap_event__type,
+ [PERF_RECORD_SAMPLE] = &pyrf_mmap_event__type,
+};
+
+static PyObject *pyrf_event__new(union perf_event *event)
+{
+ struct pyrf_event *pevent;
+ PyTypeObject *ptype;
+
+ if (event->header.type < PERF_RECORD_MMAP ||
+ event->header.type > PERF_RECORD_SAMPLE)
+ return NULL;
+
+ ptype = pyrf_event__type[event->header.type];
+ pevent = PyObject_New(struct pyrf_event, ptype);
+ if (pevent != NULL)
+ memcpy(&pevent->event, event, event->header.size);
+ return (PyObject *)pevent;
+}
+
+struct pyrf_cpu_map {
+ PyObject_HEAD
+
+ struct cpu_map *cpus;
+};
+
+static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
+ PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "cpustr", NULL, NULL, };
+ char *cpustr = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s",
+ kwlist, &cpustr))
+ return -1;
+
+ pcpus->cpus = cpu_map__new(cpustr);
+ if (pcpus->cpus == NULL)
+ return -1;
+ return 0;
+}
+
+static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus)
+{
+ cpu_map__delete(pcpus->cpus);
+ pcpus->ob_type->tp_free((PyObject*)pcpus);
+}
+
+static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
+{
+ struct pyrf_cpu_map *pcpus = (void *)obj;
+
+ return pcpus->cpus->nr;
+}
+
+static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
+{
+ struct pyrf_cpu_map *pcpus = (void *)obj;
+
+ if (i >= pcpus->cpus->nr)
+ return NULL;
+
+ return Py_BuildValue("i", pcpus->cpus->map[i]);
+}
+
+static PySequenceMethods pyrf_cpu_map__sequence_methods = {
+ .sq_length = pyrf_cpu_map__length,
+ .sq_item = pyrf_cpu_map__item,
+};
+
+static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object.");
+
+static PyTypeObject pyrf_cpu_map__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.cpu_map",
+ .tp_basicsize = sizeof(struct pyrf_cpu_map),
+ .tp_dealloc = (destructor)pyrf_cpu_map__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_cpu_map__doc,
+ .tp_as_sequence = &pyrf_cpu_map__sequence_methods,
+ .tp_init = (initproc)pyrf_cpu_map__init,
+};
+
+static int pyrf_cpu_map__setup_types(void)
+{
+ pyrf_cpu_map__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_cpu_map__type);
+}
+
+struct pyrf_thread_map {
+ PyObject_HEAD
+
+ struct thread_map *threads;
+};
+
+static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
+ PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "pid", "tid", NULL, NULL, };
+ int pid = -1, tid = -1;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii",
+ kwlist, &pid, &tid))
+ return -1;
+
+ pthreads->threads = thread_map__new(pid, tid);
+ if (pthreads->threads == NULL)
+ return -1;
+ return 0;
+}
+
+static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads)
+{
+ thread_map__delete(pthreads->threads);
+ pthreads->ob_type->tp_free((PyObject*)pthreads);
+}
+
+static Py_ssize_t pyrf_thread_map__length(PyObject *obj)
+{
+ struct pyrf_thread_map *pthreads = (void *)obj;
+
+ return pthreads->threads->nr;
+}
+
+static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
+{
+ struct pyrf_thread_map *pthreads = (void *)obj;
+
+ if (i >= pthreads->threads->nr)
+ return NULL;
+
+ return Py_BuildValue("i", pthreads->threads->map[i]);
+}
+
+static PySequenceMethods pyrf_thread_map__sequence_methods = {
+ .sq_length = pyrf_thread_map__length,
+ .sq_item = pyrf_thread_map__item,
+};
+
+static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object.");
+
+static PyTypeObject pyrf_thread_map__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.thread_map",
+ .tp_basicsize = sizeof(struct pyrf_thread_map),
+ .tp_dealloc = (destructor)pyrf_thread_map__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_thread_map__doc,
+ .tp_as_sequence = &pyrf_thread_map__sequence_methods,
+ .tp_init = (initproc)pyrf_thread_map__init,
+};
+
+static int pyrf_thread_map__setup_types(void)
+{
+ pyrf_thread_map__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_thread_map__type);
+}
+
+struct pyrf_evsel {
+ PyObject_HEAD
+
+ struct perf_evsel evsel;
+};
+
+static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = PERF_SAMPLE_PERIOD | PERF_SAMPLE_TID,
+ };
+ static char *kwlist[] = {
+ "type",
+ "config",
+ "sample_freq",
+ "sample_period",
+ "sample_type",
+ "read_format",
+ "disabled",
+ "inherit",
+ "pinned",
+ "exclusive",
+ "exclude_user",
+ "exclude_kernel",
+ "exclude_hv",
+ "exclude_idle",
+ "mmap",
+ "comm",
+ "freq",
+ "inherit_stat",
+ "enable_on_exec",
+ "task",
+ "watermark",
+ "precise_ip",
+ "mmap_data",
+ "sample_id_all",
+ "wakeup_events",
+ "bp_type",
+ "bp_addr",
+ "bp_len", NULL, NULL, };
+ u64 sample_period = 0;
+ u32 disabled = 0,
+ inherit = 0,
+ pinned = 0,
+ exclusive = 0,
+ exclude_user = 0,
+ exclude_kernel = 0,
+ exclude_hv = 0,
+ exclude_idle = 0,
+ mmap = 0,
+ comm = 0,
+ freq = 1,
+ inherit_stat = 0,
+ enable_on_exec = 0,
+ task = 0,
+ watermark = 0,
+ precise_ip = 0,
+ mmap_data = 0,
+ sample_id_all = 1;
+ int idx = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs,
+ "|iKiKKiiiiiiiiiiiiiiiiiiiiiKK", kwlist,
+ &attr.type, &attr.config, &attr.sample_freq,
+ &sample_period, &attr.sample_type,
+ &attr.read_format, &disabled, &inherit,
+ &pinned, &exclusive, &exclude_user,
+ &exclude_kernel, &exclude_hv, &exclude_idle,
+ &mmap, &comm, &freq, &inherit_stat,
+ &enable_on_exec, &task, &watermark,
+ &precise_ip, &mmap_data, &sample_id_all,
+ &attr.wakeup_events, &attr.bp_type,
+ &attr.bp_addr, &attr.bp_len, &idx))
+ return -1;
+
+ /* union... */
+ if (sample_period != 0) {
+ if (attr.sample_freq != 0)
+ return -1; /* FIXME: throw right exception */
+ attr.sample_period = sample_period;
+ }
+
+ /* Bitfields */
+ attr.disabled = disabled;
+ attr.inherit = inherit;
+ attr.pinned = pinned;
+ attr.exclusive = exclusive;
+ attr.exclude_user = exclude_user;
+ attr.exclude_kernel = exclude_kernel;
+ attr.exclude_hv = exclude_hv;
+ attr.exclude_idle = exclude_idle;
+ attr.mmap = mmap;
+ attr.comm = comm;
+ attr.freq = freq;
+ attr.inherit_stat = inherit_stat;
+ attr.enable_on_exec = enable_on_exec;
+ attr.task = task;
+ attr.watermark = watermark;
+ attr.precise_ip = precise_ip;
+ attr.mmap_data = mmap_data;
+ attr.sample_id_all = sample_id_all;
+
+ perf_evsel__init(&pevsel->evsel, &attr, idx);
+ return 0;
+}
+
+static void pyrf_evsel__delete(struct pyrf_evsel *pevsel)
+{
+ perf_evsel__exit(&pevsel->evsel);
+ pevsel->ob_type->tp_free((PyObject*)pevsel);
+}
+
+static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evsel *evsel = &pevsel->evsel;
+ struct cpu_map *cpus = NULL;
+ struct thread_map *threads = NULL;
+ PyObject *pcpus = NULL, *pthreads = NULL;
+ int group = 0, overwrite = 0;
+ static char *kwlist[] = {"cpus", "threads", "group", "overwrite", NULL, NULL};
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
+ &pcpus, &pthreads, &group, &overwrite))
+ return NULL;
+
+ if (pthreads != NULL)
+ threads = ((struct pyrf_thread_map *)pthreads)->threads;
+
+ if (pcpus != NULL)
+ cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
+
+ if (perf_evsel__open(evsel, cpus, threads, group, overwrite) < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyMethodDef pyrf_evsel__methods[] = {
+ {
+ .ml_name = "open",
+ .ml_meth = (PyCFunction)pyrf_evsel__open,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("open the event selector file descriptor table.")
+ },
+ { .ml_name = NULL, }
+};
+
+static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
+
+static PyTypeObject pyrf_evsel__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.evsel",
+ .tp_basicsize = sizeof(struct pyrf_evsel),
+ .tp_dealloc = (destructor)pyrf_evsel__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_evsel__doc,
+ .tp_methods = pyrf_evsel__methods,
+ .tp_init = (initproc)pyrf_evsel__init,
+};
+
+static int pyrf_evsel__setup_types(void)
+{
+ pyrf_evsel__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_evsel__type);
+}
+
+struct pyrf_evlist {
+ PyObject_HEAD
+
+ struct perf_evlist evlist;
+};
+
+static int pyrf_evlist__init(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs __used)
+{
+ PyObject *pcpus = NULL, *pthreads = NULL;
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+
+ if (!PyArg_ParseTuple(args, "OO", &pcpus, &pthreads))
+ return -1;
+
+ threads = ((struct pyrf_thread_map *)pthreads)->threads;
+ cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
+ perf_evlist__init(&pevlist->evlist, cpus, threads);
+ return 0;
+}
+
+static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
+{
+ perf_evlist__exit(&pevlist->evlist);
+ pevlist->ob_type->tp_free((PyObject*)pevlist);
+}
+
+static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ static char *kwlist[] = {"pages", "overwrite",
+ NULL, NULL};
+ int pages = 128, overwrite = false;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist,
+ &pages, &overwrite))
+ return NULL;
+
+ if (perf_evlist__mmap(evlist, pages, overwrite) < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ static char *kwlist[] = {"timeout", NULL, NULL};
+ int timeout = -1, n;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
+ return NULL;
+
+ n = poll(evlist->pollfd, evlist->nr_fds, timeout);
+ if (n < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ return Py_BuildValue("i", n);
+}
+
+static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
+ PyObject *args __used, PyObject *kwargs __used)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ PyObject *list = PyList_New(0);
+ int i;
+
+ for (i = 0; i < evlist->nr_fds; ++i) {
+ PyObject *file;
+ FILE *fp = fdopen(evlist->pollfd[i].fd, "r");
+
+ if (fp == NULL)
+ goto free_list;
+
+ file = PyFile_FromFile(fp, "perf", "r", NULL);
+ if (file == NULL)
+ goto free_list;
+
+ if (PyList_Append(list, file) != 0) {
+ Py_DECREF(file);
+ goto free_list;
+ }
+
+ Py_DECREF(file);
+ }
+
+ return list;
+free_list:
+ return PyErr_NoMemory();
+}
+
+
+static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs __used)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ PyObject *pevsel;
+ struct perf_evsel *evsel;
+
+ if (!PyArg_ParseTuple(args, "O", &pevsel))
+ return NULL;
+
+ Py_INCREF(pevsel);
+ evsel = &((struct pyrf_evsel *)pevsel)->evsel;
+ evsel->idx = evlist->nr_entries;
+ perf_evlist__add(evlist, evsel);
+
+ return Py_BuildValue("i", evlist->nr_entries);
+}
+
+static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ union perf_event *event;
+ int sample_id_all = 1, cpu;
+ static char *kwlist[] = {"sample_id_all", NULL, NULL};
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
+ &cpu, &sample_id_all))
+ return NULL;
+
+ event = perf_evlist__read_on_cpu(evlist, cpu);
+ if (event != NULL) {
+ struct perf_evsel *first;
+ PyObject *pyevent = pyrf_event__new(event);
+ struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
+
+ if (pyevent == NULL)
+ return PyErr_NoMemory();
+
+ first = list_entry(evlist->entries.next, struct perf_evsel, node);
+ perf_event__parse_sample(event, first->attr.sample_type, sample_id_all,
+ &pevent->sample);
+ return pyevent;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyMethodDef pyrf_evlist__methods[] = {
+ {
+ .ml_name = "mmap",
+ .ml_meth = (PyCFunction)pyrf_evlist__mmap,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("mmap the file descriptor table.")
+ },
+ {
+ .ml_name = "poll",
+ .ml_meth = (PyCFunction)pyrf_evlist__poll,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("poll the file descriptor table.")
+ },
+ {
+ .ml_name = "get_pollfd",
+ .ml_meth = (PyCFunction)pyrf_evlist__get_pollfd,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("get the poll file descriptor table.")
+ },
+ {
+ .ml_name = "add",
+ .ml_meth = (PyCFunction)pyrf_evlist__add,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("adds an event selector to the list.")
+ },
+ {
+ .ml_name = "read_on_cpu",
+ .ml_meth = (PyCFunction)pyrf_evlist__read_on_cpu,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("reads an event.")
+ },
+ { .ml_name = NULL, }
+};
+
+static Py_ssize_t pyrf_evlist__length(PyObject *obj)
+{
+ struct pyrf_evlist *pevlist = (void *)obj;
+
+ return pevlist->evlist.nr_entries;
+}
+
+static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
+{
+ struct pyrf_evlist *pevlist = (void *)obj;
+ struct perf_evsel *pos;
+
+ if (i >= pevlist->evlist.nr_entries)
+ return NULL;
+
+ list_for_each_entry(pos, &pevlist->evlist.entries, node)
+ if (i-- == 0)
+ break;
+
+ return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel));
+}
+
+static PySequenceMethods pyrf_evlist__sequence_methods = {
+ .sq_length = pyrf_evlist__length,
+ .sq_item = pyrf_evlist__item,
+};
+
+static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object.");
+
+static PyTypeObject pyrf_evlist__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.evlist",
+ .tp_basicsize = sizeof(struct pyrf_evlist),
+ .tp_dealloc = (destructor)pyrf_evlist__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_as_sequence = &pyrf_evlist__sequence_methods,
+ .tp_doc = pyrf_evlist__doc,
+ .tp_methods = pyrf_evlist__methods,
+ .tp_init = (initproc)pyrf_evlist__init,
+};
+
+static int pyrf_evlist__setup_types(void)
+{
+ pyrf_evlist__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_evlist__type);
+}
+
+static struct {
+ const char *name;
+ int value;
+} perf__constants[] = {
+ { "TYPE_HARDWARE", PERF_TYPE_HARDWARE },
+ { "TYPE_SOFTWARE", PERF_TYPE_SOFTWARE },
+ { "TYPE_TRACEPOINT", PERF_TYPE_TRACEPOINT },
+ { "TYPE_HW_CACHE", PERF_TYPE_HW_CACHE },
+ { "TYPE_RAW", PERF_TYPE_RAW },
+ { "TYPE_BREAKPOINT", PERF_TYPE_BREAKPOINT },
+
+ { "COUNT_HW_CPU_CYCLES", PERF_COUNT_HW_CPU_CYCLES },
+ { "COUNT_HW_INSTRUCTIONS", PERF_COUNT_HW_INSTRUCTIONS },
+ { "COUNT_HW_CACHE_REFERENCES", PERF_COUNT_HW_CACHE_REFERENCES },
+ { "COUNT_HW_CACHE_MISSES", PERF_COUNT_HW_CACHE_MISSES },
+ { "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+ { "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES },
+ { "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES },
+ { "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D },
+ { "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I },
+ { "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL },
+ { "COUNT_HW_CACHE_DTLB", PERF_COUNT_HW_CACHE_DTLB },
+ { "COUNT_HW_CACHE_ITLB", PERF_COUNT_HW_CACHE_ITLB },
+ { "COUNT_HW_CACHE_BPU", PERF_COUNT_HW_CACHE_BPU },
+ { "COUNT_HW_CACHE_OP_READ", PERF_COUNT_HW_CACHE_OP_READ },
+ { "COUNT_HW_CACHE_OP_WRITE", PERF_COUNT_HW_CACHE_OP_WRITE },
+ { "COUNT_HW_CACHE_OP_PREFETCH", PERF_COUNT_HW_CACHE_OP_PREFETCH },
+ { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS },
+ { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS },
+
+ { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK },
+ { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK },
+ { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS },
+ { "COUNT_SW_CONTEXT_SWITCHES", PERF_COUNT_SW_CONTEXT_SWITCHES },
+ { "COUNT_SW_CPU_MIGRATIONS", PERF_COUNT_SW_CPU_MIGRATIONS },
+ { "COUNT_SW_PAGE_FAULTS_MIN", PERF_COUNT_SW_PAGE_FAULTS_MIN },
+ { "COUNT_SW_PAGE_FAULTS_MAJ", PERF_COUNT_SW_PAGE_FAULTS_MAJ },
+ { "COUNT_SW_ALIGNMENT_FAULTS", PERF_COUNT_SW_ALIGNMENT_FAULTS },
+ { "COUNT_SW_EMULATION_FAULTS", PERF_COUNT_SW_EMULATION_FAULTS },
+
+ { "SAMPLE_IP", PERF_SAMPLE_IP },
+ { "SAMPLE_TID", PERF_SAMPLE_TID },
+ { "SAMPLE_TIME", PERF_SAMPLE_TIME },
+ { "SAMPLE_ADDR", PERF_SAMPLE_ADDR },
+ { "SAMPLE_READ", PERF_SAMPLE_READ },
+ { "SAMPLE_CALLCHAIN", PERF_SAMPLE_CALLCHAIN },
+ { "SAMPLE_ID", PERF_SAMPLE_ID },
+ { "SAMPLE_CPU", PERF_SAMPLE_CPU },
+ { "SAMPLE_PERIOD", PERF_SAMPLE_PERIOD },
+ { "SAMPLE_STREAM_ID", PERF_SAMPLE_STREAM_ID },
+ { "SAMPLE_RAW", PERF_SAMPLE_RAW },
+
+ { "FORMAT_TOTAL_TIME_ENABLED", PERF_FORMAT_TOTAL_TIME_ENABLED },
+ { "FORMAT_TOTAL_TIME_RUNNING", PERF_FORMAT_TOTAL_TIME_RUNNING },
+ { "FORMAT_ID", PERF_FORMAT_ID },
+ { "FORMAT_GROUP", PERF_FORMAT_GROUP },
+
+ { "RECORD_MMAP", PERF_RECORD_MMAP },
+ { "RECORD_LOST", PERF_RECORD_LOST },
+ { "RECORD_COMM", PERF_RECORD_COMM },
+ { "RECORD_EXIT", PERF_RECORD_EXIT },
+ { "RECORD_THROTTLE", PERF_RECORD_THROTTLE },
+ { "RECORD_UNTHROTTLE", PERF_RECORD_UNTHROTTLE },
+ { "RECORD_FORK", PERF_RECORD_FORK },
+ { "RECORD_READ", PERF_RECORD_READ },
+ { "RECORD_SAMPLE", PERF_RECORD_SAMPLE },
+ { .name = NULL, },
+};
+
+static PyMethodDef perf__methods[] = {
+ { .ml_name = NULL, }
+};
+
+PyMODINIT_FUNC initperf(void)
+{
+ PyObject *obj;
+ int i;
+ PyObject *dict, *module = Py_InitModule("perf", perf__methods);
+
+ if (module == NULL ||
+ pyrf_event__setup_types() < 0 ||
+ pyrf_evlist__setup_types() < 0 ||
+ pyrf_evsel__setup_types() < 0 ||
+ pyrf_thread_map__setup_types() < 0 ||
+ pyrf_cpu_map__setup_types() < 0)
+ return;
+
+ Py_INCREF(&pyrf_evlist__type);
+ PyModule_AddObject(module, "evlist", (PyObject*)&pyrf_evlist__type);
+
+ Py_INCREF(&pyrf_evsel__type);
+ PyModule_AddObject(module, "evsel", (PyObject*)&pyrf_evsel__type);
+
+ Py_INCREF(&pyrf_thread_map__type);
+ PyModule_AddObject(module, "thread_map", (PyObject*)&pyrf_thread_map__type);
+
+ Py_INCREF(&pyrf_cpu_map__type);
+ PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type);
+
+ dict = PyModule_GetDict(module);
+ if (dict == NULL)
+ goto error;
+
+ for (i = 0; perf__constants[i].name != NULL; i++) {
+ obj = PyInt_FromLong(perf__constants[i].value);
+ if (obj == NULL)
+ goto error;
+ PyDict_SetItemString(dict, perf__constants[i].name, obj);
+ Py_DECREF(obj);
+ }
+
+error:
+ if (PyErr_Occurred())
+ PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
+}
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index c6d99334bdfa..2040b8538527 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -248,8 +248,7 @@ static void python_process_event(int cpu, void *data,
context = PyCObject_FromVoidPtr(scripting_context, NULL);
PyTuple_SetItem(t, n++, PyString_FromString(handler_name));
- PyTuple_SetItem(t, n++,
- PyCObject_FromVoidPtr(scripting_context, NULL));
+ PyTuple_SetItem(t, n++, context);
if (handler) {
PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 105f00bfd555..f26639fa0fb3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -7,6 +7,8 @@
#include <sys/types.h>
#include <sys/mman.h>
+#include "evlist.h"
+#include "evsel.h"
#include "session.h"
#include "sort.h"
#include "util.h"
@@ -19,7 +21,7 @@ static int perf_session__open(struct perf_session *self, bool force)
self->fd_pipe = true;
self->fd = STDIN_FILENO;
- if (perf_header__read(self, self->fd) < 0)
+ if (perf_session__read_header(self, self->fd) < 0)
pr_err("incompatible file format");
return 0;
@@ -51,7 +53,7 @@ static int perf_session__open(struct perf_session *self, bool force)
goto out_close;
}
- if (perf_header__read(self, self->fd) < 0) {
+ if (perf_session__read_header(self, self->fd) < 0) {
pr_err("incompatible file format");
goto out_close;
}
@@ -67,7 +69,7 @@ out_close:
static void perf_session__id_header_size(struct perf_session *session)
{
- struct sample_data *data;
+ struct perf_sample *data;
u64 sample_type = session->sample_type;
u16 size = 0;
@@ -92,21 +94,10 @@ out:
session->id_hdr_size = size;
}
-void perf_session__set_sample_id_all(struct perf_session *session, bool value)
-{
- session->sample_id_all = value;
- perf_session__id_header_size(session);
-}
-
-void perf_session__set_sample_type(struct perf_session *session, u64 type)
-{
- session->sample_type = type;
-}
-
void perf_session__update_sample_type(struct perf_session *self)
{
- self->sample_type = perf_header__sample_type(&self->header);
- self->sample_id_all = perf_header__sample_id_all(&self->header);
+ self->sample_type = perf_evlist__sample_type(self->evlist);
+ self->sample_id_all = perf_evlist__sample_id_all(self->evlist);
perf_session__id_header_size(self);
}
@@ -135,13 +126,9 @@ struct perf_session *perf_session__new(const char *filename, int mode,
if (self == NULL)
goto out;
- if (perf_header__init(&self->header) < 0)
- goto out_free;
-
memcpy(self->filename, filename, len);
self->threads = RB_ROOT;
INIT_LIST_HEAD(&self->dead_threads);
- self->hists_tree = RB_ROOT;
self->last_match = NULL;
/*
* On 64bit we can mmap the data file in one go. No need for tiny mmap
@@ -162,17 +149,16 @@ struct perf_session *perf_session__new(const char *filename, int mode,
if (mode == O_RDONLY) {
if (perf_session__open(self, force) < 0)
goto out_delete;
+ perf_session__update_sample_type(self);
} else if (mode == O_WRONLY) {
/*
* In O_RDONLY mode this will be performed when reading the
- * kernel MMAP event, in event__process_mmap().
+ * kernel MMAP event, in perf_event__process_mmap().
*/
if (perf_session__create_kernel_maps(self) < 0)
goto out_delete;
}
- perf_session__update_sample_type(self);
-
if (ops && ops->ordering_requires_timestamps &&
ops->ordered_samples && !self->sample_id_all) {
dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
@@ -181,9 +167,6 @@ struct perf_session *perf_session__new(const char *filename, int mode,
out:
return self;
-out_free:
- free(self);
- return NULL;
out_delete:
perf_session__delete(self);
return NULL;
@@ -214,7 +197,6 @@ static void perf_session__delete_threads(struct perf_session *self)
void perf_session__delete(struct perf_session *self)
{
- perf_header__exit(&self->header);
perf_session__destroy_kernel_maps(self);
perf_session__delete_dead_threads(self);
perf_session__delete_threads(self);
@@ -242,17 +224,16 @@ static bool symbol__match_parent_regex(struct symbol *sym)
return 0;
}
-struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
- struct thread *thread,
- struct ip_callchain *chain,
- struct symbol **parent)
+int perf_session__resolve_callchain(struct perf_session *self,
+ struct thread *thread,
+ struct ip_callchain *chain,
+ struct symbol **parent)
{
u8 cpumode = PERF_RECORD_MISC_USER;
unsigned int i;
- struct map_symbol *syms = calloc(chain->nr, sizeof(*syms));
+ int err;
- if (!syms)
- return NULL;
+ callchain_cursor_reset(&self->callchain_cursor);
for (i = 0; i < chain->nr; i++) {
u64 ip = chain->ips[i];
@@ -281,30 +262,33 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
*parent = al.sym;
if (!symbol_conf.use_callchain)
break;
- syms[i].map = al.map;
- syms[i].sym = al.sym;
}
+
+ err = callchain_cursor_append(&self->callchain_cursor,
+ ip, al.map, al.sym);
+ if (err)
+ return err;
}
- return syms;
+ return 0;
}
-static int process_event_synth_stub(event_t *event __used,
+static int process_event_synth_stub(union perf_event *event __used,
struct perf_session *session __used)
{
dump_printf(": unhandled!\n");
return 0;
}
-static int process_event_stub(event_t *event __used,
- struct sample_data *sample __used,
+static int process_event_stub(union perf_event *event __used,
+ struct perf_sample *sample __used,
struct perf_session *session __used)
{
dump_printf(": unhandled!\n");
return 0;
}
-static int process_finished_round_stub(event_t *event __used,
+static int process_finished_round_stub(union perf_event *event __used,
struct perf_session *session __used,
struct perf_event_ops *ops __used)
{
@@ -312,7 +296,7 @@ static int process_finished_round_stub(event_t *event __used,
return 0;
}
-static int process_finished_round(event_t *event,
+static int process_finished_round(union perf_event *event,
struct perf_session *session,
struct perf_event_ops *ops);
@@ -329,7 +313,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
if (handler->exit == NULL)
handler->exit = process_event_stub;
if (handler->lost == NULL)
- handler->lost = event__process_lost;
+ handler->lost = perf_event__process_lost;
if (handler->read == NULL)
handler->read = process_event_stub;
if (handler->throttle == NULL)
@@ -363,98 +347,98 @@ void mem_bswap_64(void *src, int byte_size)
}
}
-static void event__all64_swap(event_t *self)
+static void perf_event__all64_swap(union perf_event *event)
{
- struct perf_event_header *hdr = &self->header;
- mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr));
+ struct perf_event_header *hdr = &event->header;
+ mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
}
-static void event__comm_swap(event_t *self)
+static void perf_event__comm_swap(union perf_event *event)
{
- self->comm.pid = bswap_32(self->comm.pid);
- self->comm.tid = bswap_32(self->comm.tid);
+ event->comm.pid = bswap_32(event->comm.pid);
+ event->comm.tid = bswap_32(event->comm.tid);
}
-static void event__mmap_swap(event_t *self)
+static void perf_event__mmap_swap(union perf_event *event)
{
- self->mmap.pid = bswap_32(self->mmap.pid);
- self->mmap.tid = bswap_32(self->mmap.tid);
- self->mmap.start = bswap_64(self->mmap.start);
- self->mmap.len = bswap_64(self->mmap.len);
- self->mmap.pgoff = bswap_64(self->mmap.pgoff);
+ event->mmap.pid = bswap_32(event->mmap.pid);
+ event->mmap.tid = bswap_32(event->mmap.tid);
+ event->mmap.start = bswap_64(event->mmap.start);
+ event->mmap.len = bswap_64(event->mmap.len);
+ event->mmap.pgoff = bswap_64(event->mmap.pgoff);
}
-static void event__task_swap(event_t *self)
+static void perf_event__task_swap(union perf_event *event)
{
- self->fork.pid = bswap_32(self->fork.pid);
- self->fork.tid = bswap_32(self->fork.tid);
- self->fork.ppid = bswap_32(self->fork.ppid);
- self->fork.ptid = bswap_32(self->fork.ptid);
- self->fork.time = bswap_64(self->fork.time);
+ event->fork.pid = bswap_32(event->fork.pid);
+ event->fork.tid = bswap_32(event->fork.tid);
+ event->fork.ppid = bswap_32(event->fork.ppid);
+ event->fork.ptid = bswap_32(event->fork.ptid);
+ event->fork.time = bswap_64(event->fork.time);
}
-static void event__read_swap(event_t *self)
+static void perf_event__read_swap(union perf_event *event)
{
- self->read.pid = bswap_32(self->read.pid);
- self->read.tid = bswap_32(self->read.tid);
- self->read.value = bswap_64(self->read.value);
- self->read.time_enabled = bswap_64(self->read.time_enabled);
- self->read.time_running = bswap_64(self->read.time_running);
- self->read.id = bswap_64(self->read.id);
+ event->read.pid = bswap_32(event->read.pid);
+ event->read.tid = bswap_32(event->read.tid);
+ event->read.value = bswap_64(event->read.value);
+ event->read.time_enabled = bswap_64(event->read.time_enabled);
+ event->read.time_running = bswap_64(event->read.time_running);
+ event->read.id = bswap_64(event->read.id);
}
-static void event__attr_swap(event_t *self)
+static void perf_event__attr_swap(union perf_event *event)
{
size_t size;
- self->attr.attr.type = bswap_32(self->attr.attr.type);
- self->attr.attr.size = bswap_32(self->attr.attr.size);
- self->attr.attr.config = bswap_64(self->attr.attr.config);
- self->attr.attr.sample_period = bswap_64(self->attr.attr.sample_period);
- self->attr.attr.sample_type = bswap_64(self->attr.attr.sample_type);
- self->attr.attr.read_format = bswap_64(self->attr.attr.read_format);
- self->attr.attr.wakeup_events = bswap_32(self->attr.attr.wakeup_events);
- self->attr.attr.bp_type = bswap_32(self->attr.attr.bp_type);
- self->attr.attr.bp_addr = bswap_64(self->attr.attr.bp_addr);
- self->attr.attr.bp_len = bswap_64(self->attr.attr.bp_len);
-
- size = self->header.size;
- size -= (void *)&self->attr.id - (void *)self;
- mem_bswap_64(self->attr.id, size);
+ event->attr.attr.type = bswap_32(event->attr.attr.type);
+ event->attr.attr.size = bswap_32(event->attr.attr.size);
+ event->attr.attr.config = bswap_64(event->attr.attr.config);
+ event->attr.attr.sample_period = bswap_64(event->attr.attr.sample_period);
+ event->attr.attr.sample_type = bswap_64(event->attr.attr.sample_type);
+ event->attr.attr.read_format = bswap_64(event->attr.attr.read_format);
+ event->attr.attr.wakeup_events = bswap_32(event->attr.attr.wakeup_events);
+ event->attr.attr.bp_type = bswap_32(event->attr.attr.bp_type);
+ event->attr.attr.bp_addr = bswap_64(event->attr.attr.bp_addr);
+ event->attr.attr.bp_len = bswap_64(event->attr.attr.bp_len);
+
+ size = event->header.size;
+ size -= (void *)&event->attr.id - (void *)event;
+ mem_bswap_64(event->attr.id, size);
}
-static void event__event_type_swap(event_t *self)
+static void perf_event__event_type_swap(union perf_event *event)
{
- self->event_type.event_type.event_id =
- bswap_64(self->event_type.event_type.event_id);
+ event->event_type.event_type.event_id =
+ bswap_64(event->event_type.event_type.event_id);
}
-static void event__tracing_data_swap(event_t *self)
+static void perf_event__tracing_data_swap(union perf_event *event)
{
- self->tracing_data.size = bswap_32(self->tracing_data.size);
+ event->tracing_data.size = bswap_32(event->tracing_data.size);
}
-typedef void (*event__swap_op)(event_t *self);
-
-static event__swap_op event__swap_ops[] = {
- [PERF_RECORD_MMAP] = event__mmap_swap,
- [PERF_RECORD_COMM] = event__comm_swap,
- [PERF_RECORD_FORK] = event__task_swap,
- [PERF_RECORD_EXIT] = event__task_swap,
- [PERF_RECORD_LOST] = event__all64_swap,
- [PERF_RECORD_READ] = event__read_swap,
- [PERF_RECORD_SAMPLE] = event__all64_swap,
- [PERF_RECORD_HEADER_ATTR] = event__attr_swap,
- [PERF_RECORD_HEADER_EVENT_TYPE] = event__event_type_swap,
- [PERF_RECORD_HEADER_TRACING_DATA] = event__tracing_data_swap,
- [PERF_RECORD_HEADER_BUILD_ID] = NULL,
- [PERF_RECORD_HEADER_MAX] = NULL,
+typedef void (*perf_event__swap_op)(union perf_event *event);
+
+static perf_event__swap_op perf_event__swap_ops[] = {
+ [PERF_RECORD_MMAP] = perf_event__mmap_swap,
+ [PERF_RECORD_COMM] = perf_event__comm_swap,
+ [PERF_RECORD_FORK] = perf_event__task_swap,
+ [PERF_RECORD_EXIT] = perf_event__task_swap,
+ [PERF_RECORD_LOST] = perf_event__all64_swap,
+ [PERF_RECORD_READ] = perf_event__read_swap,
+ [PERF_RECORD_SAMPLE] = perf_event__all64_swap,
+ [PERF_RECORD_HEADER_ATTR] = perf_event__attr_swap,
+ [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
+ [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
+ [PERF_RECORD_HEADER_BUILD_ID] = NULL,
+ [PERF_RECORD_HEADER_MAX] = NULL,
};
struct sample_queue {
u64 timestamp;
u64 file_offset;
- event_t *event;
+ union perf_event *event;
struct list_head list;
};
@@ -472,8 +456,8 @@ static void perf_session_free_sample_buffers(struct perf_session *session)
}
static int perf_session_deliver_event(struct perf_session *session,
- event_t *event,
- struct sample_data *sample,
+ union perf_event *event,
+ struct perf_sample *sample,
struct perf_event_ops *ops,
u64 file_offset);
@@ -483,7 +467,7 @@ static void flush_sample_queue(struct perf_session *s,
struct ordered_samples *os = &s->ordered_samples;
struct list_head *head = &os->samples;
struct sample_queue *tmp, *iter;
- struct sample_data sample;
+ struct perf_sample sample;
u64 limit = os->next_flush;
u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
@@ -494,7 +478,7 @@ static void flush_sample_queue(struct perf_session *s,
if (iter->timestamp > limit)
break;
- event__parse_sample(iter->event, s, &sample);
+ perf_session__parse_sample(s, iter->event, &sample);
perf_session_deliver_event(s, iter->event, &sample, ops,
iter->file_offset);
@@ -550,7 +534,7 @@ static void flush_sample_queue(struct perf_session *s,
* Flush every events below timestamp 7
* etc...
*/
-static int process_finished_round(event_t *event __used,
+static int process_finished_round(union perf_event *event __used,
struct perf_session *session,
struct perf_event_ops *ops)
{
@@ -607,12 +591,12 @@ static void __queue_event(struct sample_queue *new, struct perf_session *s)
#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
-static int perf_session_queue_event(struct perf_session *s, event_t *event,
- struct sample_data *data, u64 file_offset)
+static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
+ struct perf_sample *sample, u64 file_offset)
{
struct ordered_samples *os = &s->ordered_samples;
struct list_head *sc = &os->sample_cache;
- u64 timestamp = data->time;
+ u64 timestamp = sample->time;
struct sample_queue *new;
if (!timestamp || timestamp == ~0ULL)
@@ -648,7 +632,7 @@ static int perf_session_queue_event(struct perf_session *s, event_t *event,
return 0;
}
-static void callchain__printf(struct sample_data *sample)
+static void callchain__printf(struct perf_sample *sample)
{
unsigned int i;
@@ -660,8 +644,8 @@ static void callchain__printf(struct sample_data *sample)
}
static void perf_session__print_tstamp(struct perf_session *session,
- event_t *event,
- struct sample_data *sample)
+ union perf_event *event,
+ struct perf_sample *sample)
{
if (event->header.type != PERF_RECORD_SAMPLE &&
!session->sample_id_all) {
@@ -676,8 +660,8 @@ static void perf_session__print_tstamp(struct perf_session *session,
printf("%" PRIu64 " ", sample->time);
}
-static void dump_event(struct perf_session *session, event_t *event,
- u64 file_offset, struct sample_data *sample)
+static void dump_event(struct perf_session *session, union perf_event *event,
+ u64 file_offset, struct perf_sample *sample)
{
if (!dump_trace)
return;
@@ -691,11 +675,11 @@ static void dump_event(struct perf_session *session, event_t *event,
perf_session__print_tstamp(session, event, sample);
printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
- event->header.size, event__get_event_name(event->header.type));
+ event->header.size, perf_event__name(event->header.type));
}
-static void dump_sample(struct perf_session *session, event_t *event,
- struct sample_data *sample)
+static void dump_sample(struct perf_session *session, union perf_event *event,
+ struct perf_sample *sample)
{
if (!dump_trace)
return;
@@ -709,8 +693,8 @@ static void dump_sample(struct perf_session *session, event_t *event,
}
static int perf_session_deliver_event(struct perf_session *session,
- event_t *event,
- struct sample_data *sample,
+ union perf_event *event,
+ struct perf_sample *sample,
struct perf_event_ops *ops,
u64 file_offset)
{
@@ -743,7 +727,7 @@ static int perf_session_deliver_event(struct perf_session *session,
}
static int perf_session__preprocess_sample(struct perf_session *session,
- event_t *event, struct sample_data *sample)
+ union perf_event *event, struct perf_sample *sample)
{
if (event->header.type != PERF_RECORD_SAMPLE ||
!(session->sample_type & PERF_SAMPLE_CALLCHAIN))
@@ -758,7 +742,7 @@ static int perf_session__preprocess_sample(struct perf_session *session,
return 0;
}
-static int perf_session__process_user_event(struct perf_session *session, event_t *event,
+static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
struct perf_event_ops *ops, u64 file_offset)
{
dump_event(session, event, file_offset, NULL);
@@ -783,15 +767,16 @@ static int perf_session__process_user_event(struct perf_session *session, event_
}
static int perf_session__process_event(struct perf_session *session,
- event_t *event,
+ union perf_event *event,
struct perf_event_ops *ops,
u64 file_offset)
{
- struct sample_data sample;
+ struct perf_sample sample;
int ret;
- if (session->header.needs_swap && event__swap_ops[event->header.type])
- event__swap_ops[event->header.type](event);
+ if (session->header.needs_swap &&
+ perf_event__swap_ops[event->header.type])
+ perf_event__swap_ops[event->header.type](event);
if (event->header.type >= PERF_RECORD_HEADER_MAX)
return -EINVAL;
@@ -804,7 +789,7 @@ static int perf_session__process_event(struct perf_session *session,
/*
* For all kernel events we get the sample data
*/
- event__parse_sample(event, session, &sample);
+ perf_session__parse_sample(session, event, &sample);
/* Preprocess sample records - precheck callchains */
if (perf_session__preprocess_sample(session, event, &sample))
@@ -843,7 +828,7 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
static void perf_session__warn_about_errors(const struct perf_session *session,
const struct perf_event_ops *ops)
{
- if (ops->lost == event__process_lost &&
+ if (ops->lost == perf_event__process_lost &&
session->hists.stats.total_lost != 0) {
ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64
"!\n\nCheck IO/CPU overload!\n\n",
@@ -875,7 +860,7 @@ volatile int session_done;
static int __perf_session__process_pipe_events(struct perf_session *self,
struct perf_event_ops *ops)
{
- event_t event;
+ union perf_event event;
uint32_t size;
int skip = 0;
u64 head;
@@ -956,7 +941,7 @@ int __perf_session__process_events(struct perf_session *session,
struct ui_progress *progress;
size_t page_size, mmap_size;
char *buf, *mmaps[8];
- event_t *event;
+ union perf_event *event;
uint32_t size;
perf_event_ops__fill_defaults(ops);
@@ -1001,7 +986,7 @@ remap:
file_pos = file_offset + head;
more:
- event = (event_t *)(buf + head);
+ event = (union perf_event *)(buf + head);
if (session->header.needs_swap)
perf_event_header__bswap(&event->header);
@@ -1134,3 +1119,18 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp,
size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits);
return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits);
}
+
+size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
+{
+ struct perf_evsel *pos;
+ size_t ret = fprintf(fp, "Aggregated stats:\n");
+
+ ret += hists__fprintf_nr_events(&session->hists, fp);
+
+ list_for_each_entry(pos, &session->evlist->entries, node) {
+ ret += fprintf(fp, "%s stats:\n", event_name(pos));
+ ret += hists__fprintf_nr_events(&pos->hists, fp);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index decd83f274fd..b5b148b0aaca 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -34,12 +34,12 @@ struct perf_session {
struct thread *last_match;
struct machine host_machine;
struct rb_root machines;
- struct rb_root hists_tree;
+ struct perf_evlist *evlist;
/*
- * FIXME: should point to the first entry in hists_tree and
- * be a hists instance. Right now its only 'report'
- * that is using ->hists_tree while all the rest use
- * ->hists.
+ * FIXME: Need to split this up further, we need global
+ * stats + per event stats. 'perf diff' also needs
+ * to properly support multiple events in a single
+ * perf.data file.
*/
struct hists hists;
u64 sample_type;
@@ -51,15 +51,17 @@ struct perf_session {
int cwdlen;
char *cwd;
struct ordered_samples ordered_samples;
- char filename[0];
+ struct callchain_cursor callchain_cursor;
+ char filename[0];
};
struct perf_event_ops;
-typedef int (*event_op)(event_t *self, struct sample_data *sample,
+typedef int (*event_op)(union perf_event *self, struct perf_sample *sample,
struct perf_session *session);
-typedef int (*event_synth_op)(event_t *self, struct perf_session *session);
-typedef int (*event_op2)(event_t *self, struct perf_session *session,
+typedef int (*event_synth_op)(union perf_event *self,
+ struct perf_session *session);
+typedef int (*event_op2)(union perf_event *self, struct perf_session *session,
struct perf_event_ops *ops);
struct perf_event_ops {
@@ -94,10 +96,10 @@ int __perf_session__process_events(struct perf_session *self,
int perf_session__process_events(struct perf_session *self,
struct perf_event_ops *event_ops);
-struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
- struct thread *thread,
- struct ip_callchain *chain,
- struct symbol **parent);
+int perf_session__resolve_callchain(struct perf_session *self,
+ struct thread *thread,
+ struct ip_callchain *chain,
+ struct symbol **parent);
bool perf_session__has_traces(struct perf_session *self, const char *msg);
@@ -110,8 +112,6 @@ void mem_bswap_64(void *src, int byte_size);
int perf_session__create_kernel_maps(struct perf_session *self);
void perf_session__update_sample_type(struct perf_session *self);
-void perf_session__set_sample_id_all(struct perf_session *session, bool value);
-void perf_session__set_sample_type(struct perf_session *session, u64 type);
void perf_session__remove_thread(struct perf_session *self, struct thread *th);
static inline
@@ -149,9 +149,14 @@ size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp);
size_t perf_session__fprintf_dsos_buildid(struct perf_session *self,
FILE *fp, bool with_hits);
-static inline
-size_t perf_session__fprintf_nr_events(struct perf_session *self, FILE *fp)
+size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp);
+
+static inline int perf_session__parse_sample(struct perf_session *session,
+ const union perf_event *event,
+ struct perf_sample *sample)
{
- return hists__fprintf_nr_events(&self->hists, fp);
+ return perf_event__parse_sample(event, session->sample_type,
+ session->sample_id_all, sample);
}
+
#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
new file mode 100644
index 000000000000..e24ffadb20b2
--- /dev/null
+++ b/tools/perf/util/setup.py
@@ -0,0 +1,19 @@
+#!/usr/bin/python2
+
+from distutils.core import setup, Extension
+
+perf = Extension('perf',
+ sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c',
+ 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
+ 'util/util.c', 'util/xyarray.c', 'util/cgroup.c'],
+ include_dirs = ['util/include'],
+ extra_compile_args = ['-fno-strict-aliasing', '-Wno-write-strings'])
+
+setup(name='perf',
+ version='0.1',
+ description='Interface with the Linux profiling infrastructure',
+ author='Arnaldo Carvalho de Melo',
+ author_email='acme@redhat.com',
+ license='GPLv2',
+ url='http://perf.wiki.kernel.org',
+ ext_modules=[perf])
diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
new file mode 100644
index 000000000000..834c8ebfe38e
--- /dev/null
+++ b/tools/perf/util/strfilter.c
@@ -0,0 +1,199 @@
+#include "util.h"
+#include "string.h"
+#include "strfilter.h"
+
+/* Operators */
+static const char *OP_and = "&"; /* Logical AND */
+static const char *OP_or = "|"; /* Logical OR */
+static const char *OP_not = "!"; /* Logical NOT */
+
+#define is_operator(c) ((c) == '|' || (c) == '&' || (c) == '!')
+#define is_separator(c) (is_operator(c) || (c) == '(' || (c) == ')')
+
+static void strfilter_node__delete(struct strfilter_node *self)
+{
+ if (self) {
+ if (self->p && !is_operator(*self->p))
+ free((char *)self->p);
+ strfilter_node__delete(self->l);
+ strfilter_node__delete(self->r);
+ free(self);
+ }
+}
+
+void strfilter__delete(struct strfilter *self)
+{
+ if (self) {
+ strfilter_node__delete(self->root);
+ free(self);
+ }
+}
+
+static const char *get_token(const char *s, const char **e)
+{
+ const char *p;
+
+ while (isspace(*s)) /* Skip spaces */
+ s++;
+
+ if (*s == '\0') {
+ p = s;
+ goto end;
+ }
+
+ p = s + 1;
+ if (!is_separator(*s)) {
+ /* End search */
+retry:
+ while (*p && !is_separator(*p) && !isspace(*p))
+ p++;
+ /* Escape and special case: '!' is also used in glob pattern */
+ if (*(p - 1) == '\\' || (*p == '!' && *(p - 1) == '[')) {
+ p++;
+ goto retry;
+ }
+ }
+end:
+ *e = p;
+ return s;
+}
+
+static struct strfilter_node *strfilter_node__alloc(const char *op,
+ struct strfilter_node *l,
+ struct strfilter_node *r)
+{
+ struct strfilter_node *ret = zalloc(sizeof(struct strfilter_node));
+
+ if (ret) {
+ ret->p = op;
+ ret->l = l;
+ ret->r = r;
+ }
+
+ return ret;
+}
+
+static struct strfilter_node *strfilter_node__new(const char *s,
+ const char **ep)
+{
+ struct strfilter_node root, *cur, *last_op;
+ const char *e;
+
+ if (!s)
+ return NULL;
+
+ memset(&root, 0, sizeof(root));
+ last_op = cur = &root;
+
+ s = get_token(s, &e);
+ while (*s != '\0' && *s != ')') {
+ switch (*s) {
+ case '&': /* Exchg last OP->r with AND */
+ if (!cur->r || !last_op->r)
+ goto error;
+ cur = strfilter_node__alloc(OP_and, last_op->r, NULL);
+ if (!cur)
+ goto nomem;
+ last_op->r = cur;
+ last_op = cur;
+ break;
+ case '|': /* Exchg the root with OR */
+ if (!cur->r || !root.r)
+ goto error;
+ cur = strfilter_node__alloc(OP_or, root.r, NULL);
+ if (!cur)
+ goto nomem;
+ root.r = cur;
+ last_op = cur;
+ break;
+ case '!': /* Add NOT as a leaf node */
+ if (cur->r)
+ goto error;
+ cur->r = strfilter_node__alloc(OP_not, NULL, NULL);
+ if (!cur->r)
+ goto nomem;
+ cur = cur->r;
+ break;
+ case '(': /* Recursively parses inside the parenthesis */
+ if (cur->r)
+ goto error;
+ cur->r = strfilter_node__new(s + 1, &s);
+ if (!s)
+ goto nomem;
+ if (!cur->r || *s != ')')
+ goto error;
+ e = s + 1;
+ break;
+ default:
+ if (cur->r)
+ goto error;
+ cur->r = strfilter_node__alloc(NULL, NULL, NULL);
+ if (!cur->r)
+ goto nomem;
+ cur->r->p = strndup(s, e - s);
+ if (!cur->r->p)
+ goto nomem;
+ }
+ s = get_token(e, &e);
+ }
+ if (!cur->r)
+ goto error;
+ *ep = s;
+ return root.r;
+nomem:
+ s = NULL;
+error:
+ *ep = s;
+ strfilter_node__delete(root.r);
+ return NULL;
+}
+
+/*
+ * Parse filter rule and return new strfilter.
+ * Return NULL if fail, and *ep == NULL if memory allocation failed.
+ */
+struct strfilter *strfilter__new(const char *rules, const char **err)
+{
+ struct strfilter *ret = zalloc(sizeof(struct strfilter));
+ const char *ep = NULL;
+
+ if (ret)
+ ret->root = strfilter_node__new(rules, &ep);
+
+ if (!ret || !ret->root || *ep != '\0') {
+ if (err)
+ *err = ep;
+ strfilter__delete(ret);
+ ret = NULL;
+ }
+
+ return ret;
+}
+
+static bool strfilter_node__compare(struct strfilter_node *self,
+ const char *str)
+{
+ if (!self || !self->p)
+ return false;
+
+ switch (*self->p) {
+ case '|': /* OR */
+ return strfilter_node__compare(self->l, str) ||
+ strfilter_node__compare(self->r, str);
+ case '&': /* AND */
+ return strfilter_node__compare(self->l, str) &&
+ strfilter_node__compare(self->r, str);
+ case '!': /* NOT */
+ return !strfilter_node__compare(self->r, str);
+ default:
+ return strglobmatch(str, self->p);
+ }
+}
+
+/* Return true if STR matches the filter rules */
+bool strfilter__compare(struct strfilter *self, const char *str)
+{
+ if (!self)
+ return false;
+ return strfilter_node__compare(self->root, str);
+}
diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h
new file mode 100644
index 000000000000..00f58a7506de
--- /dev/null
+++ b/tools/perf/util/strfilter.h
@@ -0,0 +1,48 @@
+#ifndef __PERF_STRFILTER_H
+#define __PERF_STRFILTER_H
+/* General purpose glob matching filter */
+
+#include <linux/list.h>
+#include <stdbool.h>
+
+/* A node of string filter */
+struct strfilter_node {
+ struct strfilter_node *l; /* Tree left branche (for &,|) */
+ struct strfilter_node *r; /* Tree right branche (for !,&,|) */
+ const char *p; /* Operator or rule */
+};
+
+/* String filter */
+struct strfilter {
+ struct strfilter_node *root;
+};
+
+/**
+ * strfilter__new - Create a new string filter
+ * @rules: Filter rule, which is a combination of glob expressions.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and return new strfilter. Return NULL if an error detected.
+ * In that case, *@err will indicate where it is detected, and *@err is NULL
+ * if a memory allocation is failed.
+ */
+struct strfilter *strfilter__new(const char *rules, const char **err);
+
+/**
+ * strfilter__compare - compare given string and a string filter
+ * @self: String filter
+ * @str: target string
+ *
+ * Compare @str and @self. Return true if the str match the rule
+ */
+bool strfilter__compare(struct strfilter *self, const char *str);
+
+/**
+ * strfilter__delete - delete a string filter
+ * @self: String filter to delete
+ *
+ * Delete @self.
+ */
+void strfilter__delete(struct strfilter *self);
+
+#endif
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index fb737fe9be91..96c866045d60 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -456,9 +456,9 @@ void svg_legenda(void)
return;
svg_legenda_box(0, "Running", "sample");
- svg_legenda_box(100, "Idle","rect.c1");
- svg_legenda_box(200, "Deeper Idle", "rect.c3");
- svg_legenda_box(350, "Deepest Idle", "rect.c6");
+ svg_legenda_box(100, "Idle","c1");
+ svg_legenda_box(200, "Deeper Idle", "c3");
+ svg_legenda_box(350, "Deepest Idle", "c6");
svg_legenda_box(550, "Sleeping", "process2");
svg_legenda_box(650, "Waiting for cpu", "waiting");
svg_legenda_box(800, "Blocked on IO", "blocked");
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 7821d0e6866f..00014e32c288 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -207,7 +207,6 @@ struct dso *dso__new(const char *name)
dso__set_short_name(self, self->name);
for (i = 0; i < MAP__NR_TYPES; ++i)
self->symbols[i] = self->symbol_names[i] = RB_ROOT;
- self->slen_calculated = 0;
self->origin = DSO__ORIG_NOT_FOUND;
self->loaded = 0;
self->sorted_by_name = 0;
@@ -1525,8 +1524,8 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
symbol_conf.symfs, self->long_name);
break;
case DSO__ORIG_GUEST_KMODULE:
- if (map->groups && map->groups->machine)
- root_dir = map->groups->machine->root_dir;
+ if (map->groups && machine)
+ root_dir = machine->root_dir;
else
root_dir = "";
snprintf(name, size, "%s%s%s", symbol_conf.symfs,
@@ -1836,7 +1835,7 @@ int dso__load_vmlinux(struct dso *self, struct map *map,
int err = -1, fd;
char symfs_vmlinux[PATH_MAX];
- snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s/%s",
+ snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s%s",
symbol_conf.symfs, vmlinux);
fd = open(symfs_vmlinux, O_RDONLY);
if (fd < 0)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 670cd1c88f54..4d7ed09fe332 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -132,7 +132,6 @@ struct dso {
struct rb_root symbol_names[MAP__NR_TYPES];
enum dso_kernel_type kernel;
u8 adjust_symbols:1;
- u8 slen_calculated:1;
u8 has_build_id:1;
u8 hit:1;
u8 annotate_warned:1;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 00f4eade2e3e..d5d3b22250f3 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -7,61 +7,6 @@
#include "util.h"
#include "debug.h"
-/* Skip "." and ".." directories */
-static int filter(const struct dirent *dir)
-{
- if (dir->d_name[0] == '.')
- return 0;
- else
- return 1;
-}
-
-struct thread_map *thread_map__new_by_pid(pid_t pid)
-{
- struct thread_map *threads;
- char name[256];
- int items;
- struct dirent **namelist = NULL;
- int i;
-
- sprintf(name, "/proc/%d/task", pid);
- items = scandir(name, &namelist, filter, NULL);
- if (items <= 0)
- return NULL;
-
- threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
- if (threads != NULL) {
- for (i = 0; i < items; i++)
- threads->map[i] = atoi(namelist[i]->d_name);
- threads->nr = items;
- }
-
- for (i=0; i<items; i++)
- free(namelist[i]);
- free(namelist);
-
- return threads;
-}
-
-struct thread_map *thread_map__new_by_tid(pid_t tid)
-{
- struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
-
- if (threads != NULL) {
- threads->map[0] = tid;
- threads->nr = 1;
- }
-
- return threads;
-}
-
-struct thread_map *thread_map__new(pid_t pid, pid_t tid)
-{
- if (pid != -1)
- return thread_map__new_by_pid(pid);
- return thread_map__new_by_tid(tid);
-}
-
static struct thread *thread__new(pid_t pid)
{
struct thread *self = zalloc(sizeof(*self));
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index d7574101054a..e5f2401c1b5e 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -18,24 +18,10 @@ struct thread {
int comm_len;
};
-struct thread_map {
- int nr;
- int map[];
-};
-
struct perf_session;
void thread__delete(struct thread *self);
-struct thread_map *thread_map__new_by_pid(pid_t pid);
-struct thread_map *thread_map__new_by_tid(pid_t tid);
-struct thread_map *thread_map__new(pid_t pid, pid_t tid);
-
-static inline void thread_map__delete(struct thread_map *threads)
-{
- free(threads);
-}
-
int thread__set_comm(struct thread *self, const char *comm);
int thread__comm_len(struct thread *self);
struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
new file mode 100644
index 000000000000..a5df131b77c3
--- /dev/null
+++ b/tools/perf/util/thread_map.c
@@ -0,0 +1,64 @@
+#include <dirent.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "thread_map.h"
+
+/* Skip "." and ".." directories */
+static int filter(const struct dirent *dir)
+{
+ if (dir->d_name[0] == '.')
+ return 0;
+ else
+ return 1;
+}
+
+struct thread_map *thread_map__new_by_pid(pid_t pid)
+{
+ struct thread_map *threads;
+ char name[256];
+ int items;
+ struct dirent **namelist = NULL;
+ int i;
+
+ sprintf(name, "/proc/%d/task", pid);
+ items = scandir(name, &namelist, filter, NULL);
+ if (items <= 0)
+ return NULL;
+
+ threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
+ if (threads != NULL) {
+ for (i = 0; i < items; i++)
+ threads->map[i] = atoi(namelist[i]->d_name);
+ threads->nr = items;
+ }
+
+ for (i=0; i<items; i++)
+ free(namelist[i]);
+ free(namelist);
+
+ return threads;
+}
+
+struct thread_map *thread_map__new_by_tid(pid_t tid)
+{
+ struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+
+ if (threads != NULL) {
+ threads->map[0] = tid;
+ threads->nr = 1;
+ }
+
+ return threads;
+}
+
+struct thread_map *thread_map__new(pid_t pid, pid_t tid)
+{
+ if (pid != -1)
+ return thread_map__new_by_pid(pid);
+ return thread_map__new_by_tid(tid);
+}
+
+void thread_map__delete(struct thread_map *threads)
+{
+ free(threads);
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
new file mode 100644
index 000000000000..3cb907311409
--- /dev/null
+++ b/tools/perf/util/thread_map.h
@@ -0,0 +1,15 @@
+#ifndef __PERF_THREAD_MAP_H
+#define __PERF_THREAD_MAP_H
+
+#include <sys/types.h>
+
+struct thread_map {
+ int nr;
+ int map[];
+};
+
+struct thread_map *thread_map__new_by_pid(pid_t pid);
+struct thread_map *thread_map__new_by_tid(pid_t tid);
+struct thread_map *thread_map__new(pid_t pid, pid_t tid);
+void thread_map__delete(struct thread_map *threads);
+#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
new file mode 100644
index 000000000000..75cfe4d45119
--- /dev/null
+++ b/tools/perf/util/top.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Refactored from builtin-top.c, see that files for further copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "cpumap.h"
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "parse-events.h"
+#include "symbol.h"
+#include "top.h"
+#include <inttypes.h>
+
+/*
+ * Ordering weight: count-1 * count-2 * ... / count-n
+ */
+static double sym_weight(const struct sym_entry *sym, struct perf_top *top)
+{
+ double weight = sym->snap_count;
+ int counter;
+
+ if (!top->display_weighted)
+ return weight;
+
+ for (counter = 1; counter < top->evlist->nr_entries - 1; counter++)
+ weight *= sym->count[counter];
+
+ weight /= (sym->count[counter] + 1);
+
+ return weight;
+}
+
+static void perf_top__remove_active_sym(struct perf_top *top, struct sym_entry *syme)
+{
+ pthread_mutex_lock(&top->active_symbols_lock);
+ list_del_init(&syme->node);
+ pthread_mutex_unlock(&top->active_symbols_lock);
+}
+
+static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
+{
+ struct rb_node **p = &tree->rb_node;
+ struct rb_node *parent = NULL;
+ struct sym_entry *iter;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct sym_entry, rb_node);
+
+ if (se->weight > iter->weight)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&se->rb_node, parent, p);
+ rb_insert_color(&se->rb_node, tree);
+}
+
+#define SNPRINTF(buf, size, fmt, args...) \
+({ \
+ size_t r = snprintf(buf, size, fmt, ## args); \
+ r > size ? size : r; \
+})
+
+size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
+{
+ struct perf_evsel *counter;
+ float samples_per_sec = top->samples / top->delay_secs;
+ float ksamples_per_sec = top->kernel_samples / top->delay_secs;
+ float esamples_percent = (100.0 * top->exact_samples) / top->samples;
+ size_t ret = 0;
+
+ if (!perf_guest) {
+ ret = SNPRINTF(bf, size,
+ " PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
+ " exact: %4.1f%% [", samples_per_sec,
+ 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
+ samples_per_sec)),
+ esamples_percent);
+ } else {
+ float us_samples_per_sec = top->us_samples / top->delay_secs;
+ float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs;
+ float guest_us_samples_per_sec = top->guest_us_samples / top->delay_secs;
+
+ ret = SNPRINTF(bf, size,
+ " PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%"
+ " guest kernel:%4.1f%% guest us:%4.1f%%"
+ " exact: %4.1f%% [", samples_per_sec,
+ 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
+ samples_per_sec)),
+ 100.0 - (100.0 * ((samples_per_sec - us_samples_per_sec) /
+ samples_per_sec)),
+ 100.0 - (100.0 * ((samples_per_sec -
+ guest_kernel_samples_per_sec) /
+ samples_per_sec)),
+ 100.0 - (100.0 * ((samples_per_sec -
+ guest_us_samples_per_sec) /
+ samples_per_sec)),
+ esamples_percent);
+ }
+
+ if (top->evlist->nr_entries == 1 || !top->display_weighted) {
+ struct perf_evsel *first;
+ first = list_entry(top->evlist->entries.next, struct perf_evsel, node);
+ ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ",
+ (uint64_t)first->attr.sample_period,
+ top->freq ? "Hz" : "");
+ }
+
+ if (!top->display_weighted) {
+ ret += SNPRINTF(bf + ret, size - ret, "%s",
+ event_name(top->sym_evsel));
+ } else {
+ /*
+ * Don't let events eat all the space. Leaving 30 bytes
+ * for the rest should be enough.
+ */
+ size_t last_pos = size - 30;
+
+ list_for_each_entry(counter, &top->evlist->entries, node) {
+ ret += SNPRINTF(bf + ret, size - ret, "%s%s",
+ counter->idx ? "/" : "",
+ event_name(counter));
+ if (ret > last_pos) {
+ sprintf(bf + last_pos - 3, "..");
+ ret = last_pos - 1;
+ break;
+ }
+ }
+ }
+
+ ret += SNPRINTF(bf + ret, size - ret, "], ");
+
+ if (top->target_pid != -1)
+ ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d",
+ top->target_pid);
+ else if (top->target_tid != -1)
+ ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d",
+ top->target_tid);
+ else
+ ret += SNPRINTF(bf + ret, size - ret, " (all");
+
+ if (top->cpu_list)
+ ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
+ top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list);
+ else {
+ if (top->target_tid != -1)
+ ret += SNPRINTF(bf + ret, size - ret, ")");
+ else
+ ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
+ top->evlist->cpus->nr,
+ top->evlist->cpus->nr > 1 ? "s" : "");
+ }
+
+ return ret;
+}
+
+void perf_top__reset_sample_counters(struct perf_top *top)
+{
+ top->samples = top->us_samples = top->kernel_samples =
+ top->exact_samples = top->guest_kernel_samples =
+ top->guest_us_samples = 0;
+}
+
+float perf_top__decay_samples(struct perf_top *top, struct rb_root *root)
+{
+ struct sym_entry *syme, *n;
+ float sum_ksamples = 0.0;
+ int snap = !top->display_weighted ? top->sym_counter : 0, j;
+
+ /* Sort the active symbols */
+ pthread_mutex_lock(&top->active_symbols_lock);
+ syme = list_entry(top->active_symbols.next, struct sym_entry, node);
+ pthread_mutex_unlock(&top->active_symbols_lock);
+
+ top->rb_entries = 0;
+ list_for_each_entry_safe_from(syme, n, &top->active_symbols, node) {
+ syme->snap_count = syme->count[snap];
+ if (syme->snap_count != 0) {
+
+ if ((top->hide_user_symbols &&
+ syme->origin == PERF_RECORD_MISC_USER) ||
+ (top->hide_kernel_symbols &&
+ syme->origin == PERF_RECORD_MISC_KERNEL)) {
+ perf_top__remove_active_sym(top, syme);
+ continue;
+ }
+ syme->weight = sym_weight(syme, top);
+
+ if ((int)syme->snap_count >= top->count_filter) {
+ rb_insert_active_sym(root, syme);
+ ++top->rb_entries;
+ }
+ sum_ksamples += syme->snap_count;
+
+ for (j = 0; j < top->evlist->nr_entries; j++)
+ syme->count[j] = top->zero ? 0 : syme->count[j] * 7 / 8;
+ } else
+ perf_top__remove_active_sym(top, syme);
+ }
+
+ return sum_ksamples;
+}
+
+/*
+ * Find the longest symbol name that will be displayed
+ */
+void perf_top__find_widths(struct perf_top *top, struct rb_root *root,
+ int *dso_width, int *dso_short_width, int *sym_width)
+{
+ struct rb_node *nd;
+ int printed = 0;
+
+ *sym_width = *dso_width = *dso_short_width = 0;
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node);
+ struct symbol *sym = sym_entry__symbol(syme);
+
+ if (++printed > top->print_entries ||
+ (int)syme->snap_count < top->count_filter)
+ continue;
+
+ if (syme->map->dso->long_name_len > *dso_width)
+ *dso_width = syme->map->dso->long_name_len;
+
+ if (syme->map->dso->short_name_len > *dso_short_width)
+ *dso_short_width = syme->map->dso->short_name_len;
+
+ if (sym->namelen > *sym_width)
+ *sym_width = sym->namelen;
+ }
+}
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
new file mode 100644
index 000000000000..96d1cb78af01
--- /dev/null
+++ b/tools/perf/util/top.h
@@ -0,0 +1,66 @@
+#ifndef __PERF_TOP_H
+#define __PERF_TOP_H 1
+
+#include "types.h"
+#include "../perf.h"
+#include <stddef.h>
+#include <pthread.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+
+struct perf_evlist;
+struct perf_evsel;
+
+struct sym_entry {
+ struct rb_node rb_node;
+ struct list_head node;
+ unsigned long snap_count;
+ double weight;
+ int skip;
+ u8 origin;
+ struct map *map;
+ unsigned long count[0];
+};
+
+static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
+{
+ return ((void *)self) + symbol_conf.priv_size;
+}
+
+struct perf_top {
+ struct perf_evlist *evlist;
+ /*
+ * Symbols will be added here in perf_event__process_sample and will
+ * get out after decayed.
+ */
+ struct list_head active_symbols;
+ pthread_mutex_t active_symbols_lock;
+ pthread_cond_t active_symbols_cond;
+ u64 samples;
+ u64 kernel_samples, us_samples;
+ u64 exact_samples;
+ u64 guest_us_samples, guest_kernel_samples;
+ int print_entries, count_filter, delay_secs;
+ int display_weighted, freq, rb_entries, sym_counter;
+ pid_t target_pid, target_tid;
+ bool hide_kernel_symbols, hide_user_symbols, zero;
+ const char *cpu_list;
+ struct sym_entry *sym_filter_entry;
+ struct perf_evsel *sym_evsel;
+};
+
+size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
+void perf_top__reset_sample_counters(struct perf_top *top);
+float perf_top__decay_samples(struct perf_top *top, struct rb_root *root);
+void perf_top__find_widths(struct perf_top *top, struct rb_root *root,
+ int *dso_width, int *dso_short_width, int *sym_width);
+
+#ifdef NO_NEWT_SUPPORT
+static inline int perf_top__tui_browser(struct perf_top *top __used)
+{
+ return 0;
+}
+#else
+int perf_top__tui_browser(struct perf_top *top);
+#endif
+#endif /* __PERF_TOP_H */
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 73a02223c629..d8e622dd738a 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -153,7 +153,7 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused)
char *next = NULL;
char *addr_str;
char ch;
- int ret;
+ int ret __used;
int i;
line = strtok_r(file, "\n", &next);
diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c
index 8bc010edca25..611219f80680 100644
--- a/tools/perf/util/ui/browser.c
+++ b/tools/perf/util/ui/browser.c
@@ -1,4 +1,5 @@
#include "libslang.h"
+#include "ui.h"
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/rbtree.h>
@@ -156,6 +157,20 @@ void ui_browser__add_exit_keys(struct ui_browser *self, int keys[])
}
}
+void __ui_browser__show_title(struct ui_browser *browser, const char *title)
+{
+ SLsmg_gotorc(0, 0);
+ ui_browser__set_color(browser, NEWT_COLORSET_ROOT);
+ slsmg_write_nstring(title, browser->width);
+}
+
+void ui_browser__show_title(struct ui_browser *browser, const char *title)
+{
+ pthread_mutex_lock(&ui__lock);
+ __ui_browser__show_title(browser, title);
+ pthread_mutex_unlock(&ui__lock);
+}
+
int ui_browser__show(struct ui_browser *self, const char *title,
const char *helpline, ...)
{
@@ -178,9 +193,8 @@ int ui_browser__show(struct ui_browser *self, const char *title,
if (self->sb == NULL)
return -1;
- SLsmg_gotorc(0, 0);
- ui_browser__set_color(self, NEWT_COLORSET_ROOT);
- slsmg_write_nstring(title, self->width);
+ pthread_mutex_lock(&ui__lock);
+ __ui_browser__show_title(self, title);
ui_browser__add_exit_keys(self, keys);
newtFormAddComponent(self->form, self->sb);
@@ -188,25 +202,30 @@ int ui_browser__show(struct ui_browser *self, const char *title,
va_start(ap, helpline);
ui_helpline__vpush(helpline, ap);
va_end(ap);
+ pthread_mutex_unlock(&ui__lock);
return 0;
}
void ui_browser__hide(struct ui_browser *self)
{
+ pthread_mutex_lock(&ui__lock);
newtFormDestroy(self->form);
self->form = NULL;
ui_helpline__pop();
+ pthread_mutex_unlock(&ui__lock);
}
int ui_browser__refresh(struct ui_browser *self)
{
int row;
+ pthread_mutex_lock(&ui__lock);
newtScrollbarSet(self->sb, self->index, self->nr_entries - 1);
row = self->refresh(self);
ui_browser__set_color(self, HE_COLORSET_NORMAL);
SLsmg_fill_region(self->y + row, self->x,
self->height - row, self->width, ' ');
+ pthread_mutex_unlock(&ui__lock);
return 0;
}
diff --git a/tools/perf/util/ui/browser.h b/tools/perf/util/ui/browser.h
index 0dc7e4da36f5..fc63dda10910 100644
--- a/tools/perf/util/ui/browser.h
+++ b/tools/perf/util/ui/browser.h
@@ -24,7 +24,6 @@ struct ui_browser {
u32 nr_entries;
};
-
void ui_browser__set_color(struct ui_browser *self, int color);
void ui_browser__set_percent_color(struct ui_browser *self,
double percent, bool current);
@@ -35,6 +34,8 @@ void ui_browser__reset_index(struct ui_browser *self);
void ui_browser__gotorc(struct ui_browser *self, int y, int x);
void ui_browser__add_exit_key(struct ui_browser *self, int key);
void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]);
+void __ui_browser__show_title(struct ui_browser *browser, const char *title);
+void ui_browser__show_title(struct ui_browser *browser, const char *title);
int ui_browser__show(struct ui_browser *self, const char *title,
const char *helpline, ...);
void ui_browser__hide(struct ui_browser *self);
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
index 82b78f99251b..8c17a8730e4a 100644
--- a/tools/perf/util/ui/browsers/annotate.c
+++ b/tools/perf/util/ui/browsers/annotate.c
@@ -1,9 +1,12 @@
#include "../browser.h"
#include "../helpline.h"
#include "../libslang.h"
+#include "../../annotate.h"
#include "../../hist.h"
#include "../../sort.h"
#include "../../symbol.h"
+#include "../../annotate.h"
+#include <pthread.h>
static void ui__error_window(const char *fmt, ...)
{
@@ -42,8 +45,6 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
struct objdump_line_rb_node *olrb = objdump_line__rb(ol);
ui_browser__set_percent_color(self, olrb->percent, current_entry);
slsmg_printf(" %7.2f ", olrb->percent);
- if (!current_entry)
- ui_browser__set_color(self, HE_COLORSET_CODE);
} else {
ui_browser__set_percent_color(self, 0, current_entry);
slsmg_write_nstring(" ", 9);
@@ -55,35 +56,40 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
slsmg_write_nstring(" ", width - 18);
else
slsmg_write_nstring(ol->line, width - 18);
+
+ if (!current_entry)
+ ui_browser__set_color(self, HE_COLORSET_CODE);
}
static double objdump_line__calc_percent(struct objdump_line *self,
- struct list_head *head,
- struct symbol *sym)
+ struct symbol *sym, int evidx)
{
double percent = 0.0;
if (self->offset != -1) {
int len = sym->end - sym->start;
unsigned int hits = 0;
- struct sym_priv *priv = symbol__priv(sym);
- struct sym_ext *sym_ext = priv->ext;
- struct sym_hist *h = priv->hist;
+ struct annotation *notes = symbol__annotation(sym);
+ struct source_line *src_line = notes->src->lines;
+ struct sym_hist *h = annotation__histogram(notes, evidx);
s64 offset = self->offset;
- struct objdump_line *next = objdump__get_next_ip_line(head, self);
-
+ struct objdump_line *next;
+ next = objdump__get_next_ip_line(&notes->src->source, self);
while (offset < (s64)len &&
(next == NULL || offset < next->offset)) {
- if (sym_ext) {
- percent += sym_ext[offset].percent;
+ if (src_line) {
+ percent += src_line[offset].percent;
} else
- hits += h->ip[offset];
+ hits += h->addr[offset];
++offset;
}
-
- if (sym_ext == NULL && h->sum)
+ /*
+ * If the percentage wasn't already calculated in
+ * symbol__get_source_line, do it now:
+ */
+ if (src_line == NULL && h->sum)
percent = 100.0 * hits / h->sum;
}
@@ -133,103 +139,161 @@ static void annotate_browser__set_top(struct annotate_browser *self,
self->curr_hot = nd;
}
-static int annotate_browser__run(struct annotate_browser *self)
+static void annotate_browser__calc_percent(struct annotate_browser *browser,
+ int evidx)
{
- struct rb_node *nd;
- struct hist_entry *he = self->b.priv;
- int key;
+ struct symbol *sym = browser->b.priv;
+ struct annotation *notes = symbol__annotation(sym);
+ struct objdump_line *pos;
- if (ui_browser__show(&self->b, he->ms.sym->name,
- "<-, -> or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0)
- return -1;
+ browser->entries = RB_ROOT;
+
+ pthread_mutex_lock(&notes->lock);
+
+ list_for_each_entry(pos, &notes->src->source, node) {
+ struct objdump_line_rb_node *rbpos = objdump_line__rb(pos);
+ rbpos->percent = objdump_line__calc_percent(pos, sym, evidx);
+ if (rbpos->percent < 0.01) {
+ RB_CLEAR_NODE(&rbpos->rb_node);
+ continue;
+ }
+ objdump__insert_line(&browser->entries, rbpos);
+ }
+ pthread_mutex_unlock(&notes->lock);
+
+ browser->curr_hot = rb_last(&browser->entries);
+}
+
+static int annotate_browser__run(struct annotate_browser *self, int evidx,
+ int refresh)
+{
+ struct rb_node *nd = NULL;
+ struct symbol *sym = self->b.priv;
/*
- * To allow builtin-annotate to cycle thru multiple symbols by
+ * RIGHT To allow builtin-annotate to cycle thru multiple symbols by
* examining the exit key for this function.
*/
- ui_browser__add_exit_key(&self->b, NEWT_KEY_RIGHT);
+ int exit_keys[] = { 'H', NEWT_KEY_TAB, NEWT_KEY_UNTAB,
+ NEWT_KEY_RIGHT, 0 };
+ int key;
+
+ if (ui_browser__show(&self->b, sym->name,
+ "<-, -> or ESC: exit, TAB/shift+TAB: "
+ "cycle hottest lines, H: Hottest") < 0)
+ return -1;
+
+ ui_browser__add_exit_keys(&self->b, exit_keys);
+ annotate_browser__calc_percent(self, evidx);
+
+ if (self->curr_hot)
+ annotate_browser__set_top(self, self->curr_hot);
nd = self->curr_hot;
- if (nd) {
- int tabs[] = { NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0 };
- ui_browser__add_exit_keys(&self->b, tabs);
- }
+
+ if (refresh != 0)
+ newtFormSetTimer(self->b.form, refresh);
while (1) {
key = ui_browser__run(&self->b);
+ if (refresh != 0) {
+ annotate_browser__calc_percent(self, evidx);
+ /*
+ * Current line focus got out of the list of most active
+ * lines, NULL it so that if TAB|UNTAB is pressed, we
+ * move to curr_hot (current hottest line).
+ */
+ if (nd != NULL && RB_EMPTY_NODE(nd))
+ nd = NULL;
+ }
+
switch (key) {
+ case -1:
+ /*
+ * FIXME we need to check if it was
+ * es.reason == NEWT_EXIT_TIMER
+ */
+ if (refresh != 0)
+ symbol__annotate_decay_histogram(sym, evidx);
+ continue;
case NEWT_KEY_TAB:
- nd = rb_prev(nd);
- if (nd == NULL)
- nd = rb_last(&self->entries);
- annotate_browser__set_top(self, nd);
+ if (nd != NULL) {
+ nd = rb_prev(nd);
+ if (nd == NULL)
+ nd = rb_last(&self->entries);
+ } else
+ nd = self->curr_hot;
break;
case NEWT_KEY_UNTAB:
- nd = rb_next(nd);
- if (nd == NULL)
- nd = rb_first(&self->entries);
- annotate_browser__set_top(self, nd);
+ if (nd != NULL)
+ nd = rb_next(nd);
+ if (nd == NULL)
+ nd = rb_first(&self->entries);
+ else
+ nd = self->curr_hot;
+ break;
+ case 'H':
+ nd = self->curr_hot;
break;
default:
goto out;
}
+
+ if (nd != NULL)
+ annotate_browser__set_top(self, nd);
}
out:
ui_browser__hide(&self->b);
return key;
}
-int hist_entry__tui_annotate(struct hist_entry *self)
+int hist_entry__tui_annotate(struct hist_entry *he, int evidx)
+{
+ return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx, 0);
+}
+
+int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx,
+ int refresh)
{
struct objdump_line *pos, *n;
- struct objdump_line_rb_node *rbpos;
- LIST_HEAD(head);
+ struct annotation *notes = symbol__annotation(sym);
struct annotate_browser browser = {
.b = {
- .entries = &head,
+ .entries = &notes->src->source,
.refresh = ui_browser__list_head_refresh,
.seek = ui_browser__list_head_seek,
.write = annotate_browser__write,
- .priv = self,
+ .priv = sym,
},
};
int ret;
- if (self->ms.sym == NULL)
+ if (sym == NULL)
return -1;
- if (self->ms.map->dso->annotate_warned)
+ if (map->dso->annotate_warned)
return -1;
- if (hist_entry__annotate(self, &head, sizeof(*rbpos)) < 0) {
+ if (symbol__annotate(sym, map, sizeof(struct objdump_line_rb_node)) < 0) {
ui__error_window(ui_helpline__last_msg);
return -1;
}
ui_helpline__push("Press <- or ESC to exit");
- list_for_each_entry(pos, &head, node) {
+ list_for_each_entry(pos, &notes->src->source, node) {
+ struct objdump_line_rb_node *rbpos;
size_t line_len = strlen(pos->line);
+
if (browser.b.width < line_len)
browser.b.width = line_len;
rbpos = objdump_line__rb(pos);
rbpos->idx = browser.b.nr_entries++;
- rbpos->percent = objdump_line__calc_percent(pos, &head, self->ms.sym);
- if (rbpos->percent < 0.01)
- continue;
- objdump__insert_line(&browser.entries, rbpos);
}
- /*
- * Position the browser at the hottest line.
- */
- browser.curr_hot = rb_last(&browser.entries);
- if (browser.curr_hot)
- annotate_browser__set_top(&browser, browser.curr_hot);
-
browser.b.width += 18; /* Percentage */
- ret = annotate_browser__run(&browser);
- list_for_each_entry_safe(pos, n, &head, node) {
+ ret = annotate_browser__run(&browser, evidx, refresh);
+ list_for_each_entry_safe(pos, n, &notes->src->source, node) {
list_del(&pos->node);
objdump_line__free(pos);
}
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index 60c463c16028..798efdca3ead 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -7,6 +7,8 @@
#include <newt.h>
#include <linux/rbtree.h>
+#include "../../evsel.h"
+#include "../../evlist.h"
#include "../../hist.h"
#include "../../pstack.h"
#include "../../sort.h"
@@ -292,7 +294,8 @@ static int hist_browser__run(struct hist_browser *self, const char *title)
{
int key;
int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't',
- NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, 0, };
+ NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT,
+ NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0, };
self->b.entries = &self->hists->entries;
self->b.nr_entries = self->hists->nr_entries;
@@ -377,7 +380,7 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self,
while (node) {
struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
struct rb_node *next = rb_next(node);
- u64 cumul = cumul_hits(child);
+ u64 cumul = callchain_cumul_hits(child);
struct callchain_list *chain;
char folded_sign = ' ';
int first = true;
@@ -638,6 +641,9 @@ static void ui_browser__hists_seek(struct ui_browser *self,
struct rb_node *nd;
bool first = true;
+ if (self->nr_entries == 0)
+ return;
+
switch (whence) {
case SEEK_SET:
nd = hists__filter_entries(rb_first(self->entries));
@@ -797,8 +803,11 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
return printed;
}
-int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
+static int perf_evsel__hists_browse(struct perf_evsel *evsel,
+ const char *helpline, const char *ev_name,
+ bool left_exits)
{
+ struct hists *self = &evsel->hists;
struct hist_browser *browser = hist_browser__new(self);
struct pstack *fstack;
const struct thread *thread_filter = NULL;
@@ -818,8 +827,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
hists__browser_title(self, msg, sizeof(msg), ev_name,
dso_filter, thread_filter);
while (1) {
- const struct thread *thread;
- const struct dso *dso;
+ const struct thread *thread = NULL;
+ const struct dso *dso = NULL;
char *options[16];
int nr_options = 0, choice = 0, i,
annotate = -2, zoom_dso = -2, zoom_thread = -2,
@@ -827,8 +836,10 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
key = hist_browser__run(browser, msg);
- thread = hist_browser__selected_thread(browser);
- dso = browser->selection->map ? browser->selection->map->dso : NULL;
+ if (browser->he_selection != NULL) {
+ thread = hist_browser__selected_thread(browser);
+ dso = browser->selection->map ? browser->selection->map->dso : NULL;
+ }
switch (key) {
case NEWT_KEY_TAB:
@@ -839,7 +850,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
*/
goto out_free_stack;
case 'a':
- if (browser->selection->map == NULL &&
+ if (browser->selection == NULL ||
+ browser->selection->map == NULL ||
browser->selection->map->dso->annotate_warned)
continue;
goto do_annotate;
@@ -858,6 +870,7 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
"E Expand all callchains\n"
"d Zoom into current DSO\n"
"t Zoom into current Thread\n"
+ "TAB/UNTAB Switch events\n"
"q/CTRL+C Exit browser");
continue;
case NEWT_KEY_ENTER:
@@ -867,8 +880,14 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
case NEWT_KEY_LEFT: {
const void *top;
- if (pstack__empty(fstack))
+ if (pstack__empty(fstack)) {
+ /*
+ * Go back to the perf_evsel_menu__run or other user
+ */
+ if (left_exits)
+ goto out_free_stack;
continue;
+ }
top = pstack__pop(fstack);
if (top == &dso_filter)
goto zoom_out_dso;
@@ -877,14 +896,16 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
continue;
}
case NEWT_KEY_ESCAPE:
- if (!ui__dialog_yesno("Do you really want to exit?"))
+ if (!left_exits &&
+ !ui__dialog_yesno("Do you really want to exit?"))
continue;
/* Fall thru */
default:
goto out_free_stack;
}
- if (browser->selection->sym != NULL &&
+ if (browser->selection != NULL &&
+ browser->selection->sym != NULL &&
!browser->selection->map->dso->annotate_warned &&
asprintf(&options[nr_options], "Annotate %s",
browser->selection->sym->name) > 0)
@@ -903,7 +924,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
(dso->kernel ? "the Kernel" : dso->short_name)) > 0)
zoom_dso = nr_options++;
- if (browser->selection->map != NULL &&
+ if (browser->selection != NULL &&
+ browser->selection->map != NULL &&
asprintf(&options[nr_options], "Browse map details") > 0)
browse_map = nr_options++;
@@ -923,19 +945,11 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
if (choice == annotate) {
struct hist_entry *he;
do_annotate:
- if (browser->selection->map->dso->origin == DSO__ORIG_KERNEL) {
- browser->selection->map->dso->annotate_warned = 1;
- ui_helpline__puts("No vmlinux file found, can't "
- "annotate with just a "
- "kallsyms file");
- continue;
- }
-
he = hist_browser__selected_entry(browser);
if (he == NULL)
continue;
- hist_entry__tui_annotate(he);
+ hist_entry__tui_annotate(he, evsel->idx);
} else if (choice == browse_map)
map__browse(browser->selection->map);
else if (choice == zoom_dso) {
@@ -984,30 +998,141 @@ out:
return key;
}
-int hists__tui_browse_tree(struct rb_root *self, const char *help)
+struct perf_evsel_menu {
+ struct ui_browser b;
+ struct perf_evsel *selection;
+};
+
+static void perf_evsel_menu__write(struct ui_browser *browser,
+ void *entry, int row)
+{
+ struct perf_evsel_menu *menu = container_of(browser,
+ struct perf_evsel_menu, b);
+ struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
+ bool current_entry = ui_browser__is_current_entry(browser, row);
+ unsigned long nr_events = evsel->hists.stats.nr_events[PERF_RECORD_SAMPLE];
+ const char *ev_name = event_name(evsel);
+ char bf[256], unit;
+
+ ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
+ HE_COLORSET_NORMAL);
+
+ nr_events = convert_unit(nr_events, &unit);
+ snprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events,
+ unit, unit == ' ' ? "" : " ", ev_name);
+ slsmg_write_nstring(bf, browser->width);
+
+ if (current_entry)
+ menu->selection = evsel;
+}
+
+static int perf_evsel_menu__run(struct perf_evsel_menu *menu, const char *help)
{
- struct rb_node *first = rb_first(self), *nd = first, *next;
- int key = 0;
+ int exit_keys[] = { NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, };
+ struct perf_evlist *evlist = menu->b.priv;
+ struct perf_evsel *pos;
+ const char *ev_name, *title = "Available samples";
+ int key;
+
+ if (ui_browser__show(&menu->b, title,
+ "ESC: exit, ENTER|->: Browse histograms") < 0)
+ return -1;
+
+ ui_browser__add_exit_keys(&menu->b, exit_keys);
- while (nd) {
- struct hists *hists = rb_entry(nd, struct hists, rb_node);
- const char *ev_name = __event_name(hists->type, hists->config);
+ while (1) {
+ key = ui_browser__run(&menu->b);
- key = hists__browse(hists, help, ev_name);
switch (key) {
- case NEWT_KEY_TAB:
- next = rb_next(nd);
- if (next)
- nd = next;
+ case NEWT_KEY_RIGHT:
+ case NEWT_KEY_ENTER:
+ if (!menu->selection)
+ continue;
+ pos = menu->selection;
+browse_hists:
+ ev_name = event_name(pos);
+ key = perf_evsel__hists_browse(pos, help, ev_name, true);
+ ui_browser__show_title(&menu->b, title);
break;
- case NEWT_KEY_UNTAB:
- if (nd == first)
+ case NEWT_KEY_LEFT:
+ continue;
+ case NEWT_KEY_ESCAPE:
+ if (!ui__dialog_yesno("Do you really want to exit?"))
continue;
- nd = rb_prev(nd);
+ /* Fall thru */
+ default:
+ goto out;
+ }
+
+ switch (key) {
+ case NEWT_KEY_TAB:
+ if (pos->node.next == &evlist->entries)
+ pos = list_entry(evlist->entries.next, struct perf_evsel, node);
+ else
+ pos = list_entry(pos->node.next, struct perf_evsel, node);
+ goto browse_hists;
+ case NEWT_KEY_UNTAB:
+ if (pos->node.prev == &evlist->entries)
+ pos = list_entry(evlist->entries.prev, struct perf_evsel, node);
+ else
+ pos = list_entry(pos->node.prev, struct perf_evsel, node);
+ goto browse_hists;
+ case 'q':
+ case CTRL('c'):
+ goto out;
default:
- return key;
+ break;
}
}
+out:
+ ui_browser__hide(&menu->b);
return key;
}
+
+static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
+ const char *help)
+{
+ struct perf_evsel *pos;
+ struct perf_evsel_menu menu = {
+ .b = {
+ .entries = &evlist->entries,
+ .refresh = ui_browser__list_head_refresh,
+ .seek = ui_browser__list_head_seek,
+ .write = perf_evsel_menu__write,
+ .nr_entries = evlist->nr_entries,
+ .priv = evlist,
+ },
+ };
+
+ ui_helpline__push("Press ESC to exit");
+
+ list_for_each_entry(pos, &evlist->entries, node) {
+ const char *ev_name = event_name(pos);
+ size_t line_len = strlen(ev_name) + 7;
+
+ if (menu.b.width < line_len)
+ menu.b.width = line_len;
+ /*
+ * Cache the evsel name, tracepoints have a _high_ cost per
+ * event_name() call.
+ */
+ if (pos->name == NULL)
+ pos->name = strdup(ev_name);
+ }
+
+ return perf_evsel_menu__run(&menu, help);
+}
+
+int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help)
+{
+
+ if (evlist->nr_entries == 1) {
+ struct perf_evsel *first = list_entry(evlist->entries.next,
+ struct perf_evsel, node);
+ const char *ev_name = event_name(first);
+ return perf_evsel__hists_browse(first, help, ev_name, false);
+ }
+
+ return __perf_evlist__tui_browse_hists(evlist, help);
+}
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c
index e5158369106e..8462bffe20bc 100644
--- a/tools/perf/util/ui/browsers/map.c
+++ b/tools/perf/util/ui/browsers/map.c
@@ -41,7 +41,7 @@ static int ui_entry__read(const char *title, char *bf, size_t size, int width)
out_free_form:
newtPopWindow();
newtFormDestroy(form);
- return 0;
+ return err;
}
struct map_browser {
diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c
new file mode 100644
index 000000000000..5a06538532af
--- /dev/null
+++ b/tools/perf/util/ui/browsers/top.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include "../browser.h"
+#include "../../annotate.h"
+#include "../helpline.h"
+#include "../libslang.h"
+#include "../util.h"
+#include "../../evlist.h"
+#include "../../hist.h"
+#include "../../sort.h"
+#include "../../symbol.h"
+#include "../../top.h"
+
+struct perf_top_browser {
+ struct ui_browser b;
+ struct rb_root root;
+ struct sym_entry *selection;
+ float sum_ksamples;
+ int dso_width;
+ int dso_short_width;
+ int sym_width;
+};
+
+static void perf_top_browser__write(struct ui_browser *browser, void *entry, int row)
+{
+ struct perf_top_browser *top_browser = container_of(browser, struct perf_top_browser, b);
+ struct sym_entry *syme = rb_entry(entry, struct sym_entry, rb_node);
+ bool current_entry = ui_browser__is_current_entry(browser, row);
+ struct symbol *symbol = sym_entry__symbol(syme);
+ struct perf_top *top = browser->priv;
+ int width = browser->width;
+ double pcnt;
+
+ pcnt = 100.0 - (100.0 * ((top_browser->sum_ksamples - syme->snap_count) /
+ top_browser->sum_ksamples));
+ ui_browser__set_percent_color(browser, pcnt, current_entry);
+
+ if (top->evlist->nr_entries == 1 || !top->display_weighted) {
+ slsmg_printf("%20.2f ", syme->weight);
+ width -= 24;
+ } else {
+ slsmg_printf("%9.1f %10ld ", syme->weight, syme->snap_count);
+ width -= 23;
+ }
+
+ slsmg_printf("%4.1f%%", pcnt);
+ width -= 7;
+
+ if (verbose) {
+ slsmg_printf(" %016" PRIx64, symbol->start);
+ width -= 17;
+ }
+
+ slsmg_printf(" %-*.*s ", top_browser->sym_width, top_browser->sym_width,
+ symbol->name);
+ width -= top_browser->sym_width;
+ slsmg_write_nstring(width >= syme->map->dso->long_name_len ?
+ syme->map->dso->long_name :
+ syme->map->dso->short_name, width);
+
+ if (current_entry)
+ top_browser->selection = syme;
+}
+
+static void perf_top_browser__update_rb_tree(struct perf_top_browser *browser)
+{
+ struct perf_top *top = browser->b.priv;
+ u64 top_idx = browser->b.top_idx;
+
+ browser->root = RB_ROOT;
+ browser->b.top = NULL;
+ browser->sum_ksamples = perf_top__decay_samples(top, &browser->root);
+ /*
+ * No active symbols
+ */
+ if (top->rb_entries == 0)
+ return;
+
+ perf_top__find_widths(top, &browser->root, &browser->dso_width,
+ &browser->dso_short_width,
+ &browser->sym_width);
+ if (browser->sym_width + browser->dso_width > browser->b.width - 29) {
+ browser->dso_width = browser->dso_short_width;
+ if (browser->sym_width + browser->dso_width > browser->b.width - 29)
+ browser->sym_width = browser->b.width - browser->dso_width - 29;
+ }
+
+ /*
+ * Adjust the ui_browser indexes since the entries in the browser->root
+ * rb_tree may have changed, then seek it from start, so that we get a
+ * possible new top of the screen.
+ */
+ browser->b.nr_entries = top->rb_entries;
+
+ if (top_idx >= browser->b.nr_entries) {
+ if (browser->b.height >= browser->b.nr_entries)
+ top_idx = browser->b.nr_entries - browser->b.height;
+ else
+ top_idx = 0;
+ }
+
+ if (browser->b.index >= top_idx + browser->b.height)
+ browser->b.index = top_idx + browser->b.index - browser->b.top_idx;
+
+ if (browser->b.index >= browser->b.nr_entries)
+ browser->b.index = browser->b.nr_entries - 1;
+
+ browser->b.top_idx = top_idx;
+ browser->b.seek(&browser->b, top_idx, SEEK_SET);
+}
+
+static void perf_top_browser__annotate(struct perf_top_browser *browser)
+{
+ struct sym_entry *syme = browser->selection;
+ struct symbol *sym = sym_entry__symbol(syme);
+ struct annotation *notes = symbol__annotation(sym);
+ struct perf_top *top = browser->b.priv;
+
+ if (notes->src != NULL)
+ goto do_annotation;
+
+ pthread_mutex_lock(&notes->lock);
+
+ top->sym_filter_entry = NULL;
+
+ if (symbol__alloc_hist(sym, top->evlist->nr_entries) < 0) {
+ pr_err("Not enough memory for annotating '%s' symbol!\n",
+ sym->name);
+ pthread_mutex_unlock(&notes->lock);
+ return;
+ }
+
+ top->sym_filter_entry = syme;
+
+ pthread_mutex_unlock(&notes->lock);
+do_annotation:
+ symbol__tui_annotate(sym, syme->map, 0, top->delay_secs * 1000);
+}
+
+static int perf_top_browser__run(struct perf_top_browser *browser)
+{
+ int key;
+ char title[160];
+ struct perf_top *top = browser->b.priv;
+ int delay_msecs = top->delay_secs * 1000;
+ int exit_keys[] = { 'a', NEWT_KEY_ENTER, NEWT_KEY_RIGHT, 0, };
+
+ perf_top_browser__update_rb_tree(browser);
+ perf_top__header_snprintf(top, title, sizeof(title));
+ perf_top__reset_sample_counters(top);
+
+ if (ui_browser__show(&browser->b, title,
+ "ESC: exit, ENTER|->|a: Live Annotate") < 0)
+ return -1;
+
+ newtFormSetTimer(browser->b.form, delay_msecs);
+ ui_browser__add_exit_keys(&browser->b, exit_keys);
+
+ while (1) {
+ key = ui_browser__run(&browser->b);
+
+ switch (key) {
+ case -1:
+ /* FIXME we need to check if it was es.reason == NEWT_EXIT_TIMER */
+ perf_top_browser__update_rb_tree(browser);
+ perf_top__header_snprintf(top, title, sizeof(title));
+ perf_top__reset_sample_counters(top);
+ ui_browser__set_color(&browser->b, NEWT_COLORSET_ROOT);
+ SLsmg_gotorc(0, 0);
+ slsmg_write_nstring(title, browser->b.width);
+ break;
+ case 'a':
+ case NEWT_KEY_RIGHT:
+ case NEWT_KEY_ENTER:
+ if (browser->selection)
+ perf_top_browser__annotate(browser);
+ break;
+ case NEWT_KEY_LEFT:
+ continue;
+ case NEWT_KEY_ESCAPE:
+ if (!ui__dialog_yesno("Do you really want to exit?"))
+ continue;
+ /* Fall thru */
+ default:
+ goto out;
+ }
+ }
+out:
+ ui_browser__hide(&browser->b);
+ return key;
+}
+
+int perf_top__tui_browser(struct perf_top *top)
+{
+ struct perf_top_browser browser = {
+ .b = {
+ .entries = &browser.root,
+ .refresh = ui_browser__rb_tree_refresh,
+ .seek = ui_browser__rb_tree_seek,
+ .write = perf_top_browser__write,
+ .priv = top,
+ },
+ };
+
+ ui_helpline__push("Press <- or ESC to exit");
+ return perf_top_browser__run(&browser);
+}
diff --git a/tools/perf/util/ui/helpline.c b/tools/perf/util/ui/helpline.c
index 8d79daa4458a..f36d2ff509ed 100644
--- a/tools/perf/util/ui/helpline.c
+++ b/tools/perf/util/ui/helpline.c
@@ -5,6 +5,7 @@
#include "../debug.h"
#include "helpline.h"
+#include "ui.h"
void ui_helpline__pop(void)
{
@@ -55,7 +56,8 @@ int ui_helpline__show_help(const char *format, va_list ap)
int ret;
static int backlog;
- ret = vsnprintf(ui_helpline__last_msg + backlog,
+ pthread_mutex_lock(&ui__lock);
+ ret = vsnprintf(ui_helpline__last_msg + backlog,
sizeof(ui_helpline__last_msg) - backlog, format, ap);
backlog += ret;
@@ -64,6 +66,7 @@ int ui_helpline__show_help(const char *format, va_list ap)
newtRefresh();
backlog = 0;
}
+ pthread_mutex_unlock(&ui__lock);
return ret;
}
diff --git a/tools/perf/util/ui/libslang.h b/tools/perf/util/ui/libslang.h
index 5623da8e8080..2b63e1c9b181 100644
--- a/tools/perf/util/ui/libslang.h
+++ b/tools/perf/util/ui/libslang.h
@@ -13,11 +13,11 @@
#if SLANG_VERSION < 20104
#define slsmg_printf(msg, args...) \
- SLsmg_printf((char *)msg, ##args)
+ SLsmg_printf((char *)(msg), ##args)
#define slsmg_write_nstring(msg, len) \
- SLsmg_write_nstring((char *)msg, len)
+ SLsmg_write_nstring((char *)(msg), len)
#define sltt_set_color(obj, name, fg, bg) \
- SLtt_set_color(obj,(char *)name, (char *)fg, (char *)bg)
+ SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg))
#else
#define slsmg_printf SLsmg_printf
#define slsmg_write_nstring SLsmg_write_nstring
diff --git a/tools/perf/util/ui/setup.c b/tools/perf/util/ui/setup.c
index 662085032eb7..ee46d671db59 100644
--- a/tools/perf/util/ui/setup.c
+++ b/tools/perf/util/ui/setup.c
@@ -6,6 +6,9 @@
#include "../debug.h"
#include "browser.h"
#include "helpline.h"
+#include "ui.h"
+
+pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
static void newt_suspend(void *d __used)
{
@@ -14,11 +17,12 @@ static void newt_suspend(void *d __used)
newtResume();
}
-void setup_browser(void)
+void setup_browser(bool fallback_to_pager)
{
if (!isatty(1) || !use_browser || dump_trace) {
use_browser = 0;
- setup_pager();
+ if (fallback_to_pager)
+ setup_pager();
return;
}
diff --git a/tools/perf/util/ui/ui.h b/tools/perf/util/ui/ui.h
new file mode 100644
index 000000000000..d264e059c829
--- /dev/null
+++ b/tools/perf/util/ui/ui.h
@@ -0,0 +1,8 @@
+#ifndef _PERF_UI_H_
+#define _PERF_UI_H_ 1
+
+#include <pthread.h>
+
+extern pthread_mutex_t ui__lock;
+
+#endif /* _PERF_UI_H_ */
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c
index 7b5a8926624e..fdf1fc8f08bc 100644
--- a/tools/perf/util/ui/util.c
+++ b/tools/perf/util/ui/util.c
@@ -9,6 +9,7 @@
#include "../debug.h"
#include "browser.h"
#include "helpline.h"
+#include "ui.h"
#include "util.h"
static void newt_form__set_exit_keys(newtComponent self)
@@ -118,10 +119,12 @@ void ui__warning(const char *format, ...)
va_list args;
va_start(args, format);
- if (use_browser > 0)
+ if (use_browser > 0) {
+ pthread_mutex_lock(&ui__lock);
newtWinMessagev((char *)warning_str, (char *)ok,
(char *)format, args);
- else
+ pthread_mutex_unlock(&ui__lock);
+ } else
vfprintf(stderr, format, args);
va_end(args);
}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index e833f26f3bfc..fc784284ac8b 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -70,9 +70,7 @@
#include <sys/poll.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
-#ifndef NO_SYS_SELECT_H
#include <sys/select.h>
-#endif
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
@@ -83,10 +81,6 @@
#include "types.h"
#include <sys/ttydefaults.h>
-#ifndef NO_ICONV
-#include <iconv.h>
-#endif
-
extern const char *graph_line;
extern const char *graph_dotted_line;
extern char buildid_dir[];
@@ -236,26 +230,6 @@ static inline int sane_case(int x, int high)
return x;
}
-#ifndef DIR_HAS_BSD_GROUP_SEMANTICS
-# define FORCE_DIR_SET_GID S_ISGID
-#else
-# define FORCE_DIR_SET_GID 0
-#endif
-
-#ifdef NO_NSEC
-#undef USE_NSEC
-#define ST_CTIME_NSEC(st) 0
-#define ST_MTIME_NSEC(st) 0
-#else
-#ifdef USE_ST_TIMESPEC
-#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec))
-#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec))
-#else
-#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec))
-#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec))
-#endif
-#endif
-
int mkdir_p(char *path, mode_t mode);
int copyfile(const char *from, const char *to);
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index e1c62eeb88f5..ba7c63af6f3b 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1,6 +1,6 @@
#!/usr/bin/perl -w
#
-# Copywrite 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
+# Copyright 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
# Licensed under the terms of the GNU GPL License version 2
#