diff options
Diffstat (limited to 'arch')
44 files changed, 368 insertions, 222 deletions
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index e4ed1bc9a734..a6810923b3f0 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -1377,29 +1377,32 @@ do { \ #define __write_64bit_c0_split(source, sel, val) \ do { \ + unsigned long long __tmp; \ unsigned long __flags; \ \ local_irq_save(__flags); \ if (sel == 0) \ __asm__ __volatile__( \ ".set\tmips64\n\t" \ - "dsll\t%L0, %L0, 32\n\t" \ + "dsll\t%L0, %L1, 32\n\t" \ "dsrl\t%L0, %L0, 32\n\t" \ - "dsll\t%M0, %M0, 32\n\t" \ + "dsll\t%M0, %M1, 32\n\t" \ "or\t%L0, %L0, %M0\n\t" \ "dmtc0\t%L0, " #source "\n\t" \ ".set\tmips0" \ - : : "r" (val)); \ + : "=&r,r" (__tmp) \ + : "r,0" (val)); \ else \ __asm__ __volatile__( \ ".set\tmips64\n\t" \ - "dsll\t%L0, %L0, 32\n\t" \ + "dsll\t%L0, %L1, 32\n\t" \ "dsrl\t%L0, %L0, 32\n\t" \ - "dsll\t%M0, %M0, 32\n\t" \ + "dsll\t%M0, %M1, 32\n\t" \ "or\t%L0, %L0, %M0\n\t" \ "dmtc0\t%L0, " #source ", " #sel "\n\t" \ ".set\tmips0" \ - : : "r" (val)); \ + : "=&r,r" (__tmp) \ + : "r,0" (val)); \ local_irq_restore(__flags); \ } while (0) diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 9e6c74bf66c4..6668f67a61c3 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -618,8 +618,7 @@ static int mipspmu_event_init(struct perf_event *event) return -ENOENT; } - if ((unsigned int)event->cpu >= nr_cpumask_bits || - (event->cpu >= 0 && !cpu_online(event->cpu))) + if (event->cpu >= 0 && !cpu_online(event->cpu)) return -ENODEV; if (!atomic_inc_not_zero(&active_events)) { diff --git a/arch/mips/pci/fixup-capcella.c b/arch/mips/pci/fixup-capcella.c index 1c02f5737367..35a671595a37 100644 --- a/arch/mips/pci/fixup-capcella.c +++ b/arch/mips/pci/fixup-capcella.c @@ -38,7 +38,7 @@ static char irq_tab_capcella[][5] __initdata = { [14] = { -1, INTA, INTB, INTC, INTD } }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return irq_tab_capcella[slot][pin]; } diff --git a/arch/mips/pci/fixup-cobalt.c b/arch/mips/pci/fixup-cobalt.c index b3ab59318d91..62810d3fe99b 100644 --- a/arch/mips/pci/fixup-cobalt.c +++ b/arch/mips/pci/fixup-cobalt.c @@ -174,7 +174,7 @@ static char irq_tab_raq2[] __initdata = { [COBALT_PCICONF_ETH1] = ETH1_IRQ }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (cobalt_board_id <= COBALT_BRD_ID_QUBE1) return irq_tab_qube1[slot]; diff --git a/arch/mips/pci/fixup-emma2rh.c b/arch/mips/pci/fixup-emma2rh.c index 19caf775c206..4832ac9f118a 100644 --- a/arch/mips/pci/fixup-emma2rh.c +++ b/arch/mips/pci/fixup-emma2rh.c @@ -85,7 +85,7 @@ static void emma2rh_pci_host_fixup(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_EMMA2RH, emma2rh_pci_host_fixup); -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return irq_map[slot][pin]; } diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c index 50da773faede..b47c2771dc99 100644 --- a/arch/mips/pci/fixup-fuloong2e.c +++ b/arch/mips/pci/fixup-fuloong2e.c @@ -19,7 +19,7 @@ /* South bridge slot number is set by the pci probe process */ static u8 sb_slot = 5; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq = 0; diff --git a/arch/mips/pci/fixup-ip32.c b/arch/mips/pci/fixup-ip32.c index 133685e215ee..ea29f5450be3 100644 --- a/arch/mips/pci/fixup-ip32.c +++ b/arch/mips/pci/fixup-ip32.c @@ -39,7 +39,7 @@ static char irq_tab_mace[][5] __initdata = { * irqs. I suppose a device without a pin A will thank us for doing it * right if there exists such a broken piece of crap. */ -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return irq_tab_mace[slot][pin]; } diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c index 2b5427d3f35c..81530a13b349 100644 --- a/arch/mips/pci/fixup-lantiq.c +++ b/arch/mips/pci/fixup-lantiq.c @@ -23,7 +23,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev) return 0; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return of_irq_parse_and_map_pci(dev, slot, pin); } diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c index 95ab9a1bd010..7e5991e0e323 100644 --- a/arch/mips/pci/fixup-lemote2f.c +++ b/arch/mips/pci/fixup-lemote2f.c @@ -51,7 +51,7 @@ static char irq_tab[][5] __initdata = { {0, 0, 0, 0, 0}, /* 27: Unused */ }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int virq; diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c index 2b6d5e196f99..8a741c2c6685 100644 --- a/arch/mips/pci/fixup-loongson3.c +++ b/arch/mips/pci/fixup-loongson3.c @@ -32,7 +32,7 @@ static void print_fixup_info(const struct pci_dev *pdev) pdev->vendor, pdev->device, pdev->irq); } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { print_fixup_info(dev); return dev->irq; diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c index 40e920c653cc..1f5f25e39590 100644 --- a/arch/mips/pci/fixup-malta.c +++ b/arch/mips/pci/fixup-malta.c @@ -38,7 +38,7 @@ static char irq_tab[][5] __initdata = { {0, PCID, PCIA, PCIB, PCIC } /* 21: PCI Slot 4 */ }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int virq; virq = irq_tab[slot][pin]; diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c index 8e4f8288eca2..5da62c76e271 100644 --- a/arch/mips/pci/fixup-mpc30x.c +++ b/arch/mips/pci/fixup-mpc30x.c @@ -34,7 +34,7 @@ static const int irq_tab_mpc30x[] __initconst = { [29] = MQ200_IRQ, }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (slot == 30) return internal_func_irqs[PCI_FUNC(dev->devfn)]; diff --git a/arch/mips/pci/fixup-pmcmsp.c b/arch/mips/pci/fixup-pmcmsp.c index fab405c21c2f..f2b7b1e4395b 100644 --- a/arch/mips/pci/fixup-pmcmsp.c +++ b/arch/mips/pci/fixup-pmcmsp.c @@ -202,7 +202,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev) * RETURNS: IRQ number * ****************************************************************************/ -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { #if !defined(CONFIG_PMC_MSP7120_GW) && !defined(CONFIG_PMC_MSP7120_EVAL) printk(KERN_WARNING "PCI: unknown board, no PCI IRQs assigned.\n"); diff --git a/arch/mips/pci/fixup-sni.c b/arch/mips/pci/fixup-sni.c index f67ebeeb4200..309e1c562959 100644 --- a/arch/mips/pci/fixup-sni.c +++ b/arch/mips/pci/fixup-sni.c @@ -130,7 +130,7 @@ static inline int is_rm300_revd(void) return (csmsr & 0xa0) == 0x20; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { switch (sni_brd_type) { case SNI_BRD_PCI_TOWER_CPLUS: diff --git a/arch/mips/pci/fixup-tb0219.c b/arch/mips/pci/fixup-tb0219.c index d0b0083fbd27..cc581535f257 100644 --- a/arch/mips/pci/fixup-tb0219.c +++ b/arch/mips/pci/fixup-tb0219.c @@ -23,7 +23,7 @@ #include <asm/vr41xx/tb0219.h> -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq = -1; diff --git a/arch/mips/pci/fixup-tb0226.c b/arch/mips/pci/fixup-tb0226.c index 4196ccf3ea3d..b827b5cad5fd 100644 --- a/arch/mips/pci/fixup-tb0226.c +++ b/arch/mips/pci/fixup-tb0226.c @@ -23,7 +23,7 @@ #include <asm/vr41xx/giu.h> #include <asm/vr41xx/tb0226.h> -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq = -1; diff --git a/arch/mips/pci/fixup-tb0287.c b/arch/mips/pci/fixup-tb0287.c index 8c5039ed75d7..98f26285f2e3 100644 --- a/arch/mips/pci/fixup-tb0287.c +++ b/arch/mips/pci/fixup-tb0287.c @@ -22,7 +22,7 @@ #include <asm/vr41xx/tb0287.h> -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { unsigned char bus; int irq = -1; diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c index e99ca7702d8a..f15ec98de2de 100644 --- a/arch/mips/pci/pci-alchemy.c +++ b/arch/mips/pci/pci-alchemy.c @@ -522,7 +522,7 @@ static int __init alchemy_pci_init(void) arch_initcall(alchemy_pci_init); -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { struct alchemy_pci_context *ctx = dev->sysdata; if (ctx && ctx->board_map_irq) diff --git a/arch/mips/pci/pci-bcm47xx.c b/arch/mips/pci/pci-bcm47xx.c index 76f16eaed0ad..230d7dd273e2 100644 --- a/arch/mips/pci/pci-bcm47xx.c +++ b/arch/mips/pci/pci-bcm47xx.c @@ -28,7 +28,7 @@ #include <linux/bcma/bcma.h> #include <bcm47xx.h> -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return 0; } diff --git a/arch/mips/pci/pci-lasat.c b/arch/mips/pci/pci-lasat.c index 40d2797d2bc4..47f4ee6bbb3b 100644 --- a/arch/mips/pci/pci-lasat.c +++ b/arch/mips/pci/pci-lasat.c @@ -61,7 +61,7 @@ arch_initcall(lasat_pci_setup); #define LASAT_IRQ_PCIC (LASAT_IRQ_BASE + 7) #define LASAT_IRQ_PCID (LASAT_IRQ_BASE + 8) -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { switch (slot) { case 1: diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c index 4e633c1e7ff3..90fba9bf98da 100644 --- a/arch/mips/pci/pci-mt7620.c +++ b/arch/mips/pci/pci-mt7620.c @@ -361,7 +361,7 @@ static int mt7620_pci_probe(struct platform_device *pdev) return 0; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { u16 cmd; u32 val; diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index 9ee01936862e..771f5de6362d 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -59,7 +59,7 @@ union octeon_pci_address { } s; }; -int __initconst (*octeon_pcibios_map_irq)(const struct pci_dev *dev, +int (*octeon_pcibios_map_irq)(const struct pci_dev *dev, u8 slot, u8 pin); enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID; @@ -74,7 +74,7 @@ enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID; * as it goes through each bridge. * Returns Interrupt number for the device */ -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (octeon_pcibios_map_irq) return octeon_pcibios_map_irq(dev, slot, pin); diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c index d6360fe73d05..711cdccdf65b 100644 --- a/arch/mips/pci/pci-rt2880.c +++ b/arch/mips/pci/pci-rt2880.c @@ -181,7 +181,7 @@ static inline void rt2880_pci_write_u32(unsigned long reg, u32 val) spin_unlock_irqrestore(&rt2880_pci_lock, flags); } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { u16 cmd; int irq = -1; diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c index 04f8ea953297..958899ffe99c 100644 --- a/arch/mips/pci/pci-rt3883.c +++ b/arch/mips/pci/pci-rt3883.c @@ -564,7 +564,7 @@ err_put_intc_node: return err; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return of_irq_parse_and_map_pci(dev, slot, pin); } diff --git a/arch/mips/pci/pci-xlp.c b/arch/mips/pci/pci-xlp.c index 7babf01600cb..9eff9137f78e 100644 --- a/arch/mips/pci/pci-xlp.c +++ b/arch/mips/pci/pci-xlp.c @@ -205,7 +205,7 @@ int xlp_socdev_to_node(const struct pci_dev *lnkdev) return PCI_SLOT(lnkdev->devfn) / 8; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { struct pci_dev *lnkdev; int lnkfunc, node; diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c index 26d2dabef281..2a1c81a129ba 100644 --- a/arch/mips/pci/pci-xlr.c +++ b/arch/mips/pci/pci-xlr.c @@ -315,7 +315,7 @@ static void xls_pcie_ack_b(struct irq_data *d) } } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return get_irq_vector(dev); } diff --git a/arch/mips/pmcs-msp71xx/msp_smp.c b/arch/mips/pmcs-msp71xx/msp_smp.c index ffa0f7101a97..2b08242ade62 100644 --- a/arch/mips/pmcs-msp71xx/msp_smp.c +++ b/arch/mips/pmcs-msp71xx/msp_smp.c @@ -22,6 +22,8 @@ #include <linux/smp.h> #include <linux/interrupt.h> +#include <asm/setup.h> + #ifdef CONFIG_MIPS_MT_SMP #define MIPS_CPU_IPI_RESCHED_IRQ 0 /* SW int 0 for resched */ #define MIPS_CPU_IPI_CALL_IRQ 1 /* SW int 1 for call */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index dce708e061ea..20e75a2ca93a 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1507,7 +1507,9 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, static inline void pmdp_invalidate(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); + + pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd); } #define __HAVE_ARCH_PMDP_SET_WRPROTECT diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index ca8cd80e8feb..60181caf8e8a 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -404,18 +404,6 @@ static inline void save_vector_registers(void) #endif } -static int __init topology_setup(char *str) -{ - bool enabled; - int rc; - - rc = kstrtobool(str, &enabled); - if (!rc && !enabled) - S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY; - return rc; -} -early_param("topology", topology_setup); - static int __init disable_vector_extension(char *str) { S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index c1bf75ffb875..7e1e40323b78 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -823,9 +823,12 @@ static int cpumsf_pmu_event_init(struct perf_event *event) } /* Check online status of the CPU to which the event is pinned */ - if ((unsigned int)event->cpu >= nr_cpumask_bits || - (event->cpu >= 0 && !cpu_online(event->cpu))) - return -ENODEV; + if (event->cpu >= 0) { + if ((unsigned int)event->cpu >= nr_cpumask_bits) + return -ENODEV; + if (!cpu_online(event->cpu)) + return -ENODEV; + } /* Force reset of idle/hv excludes regardless of what the * user requested. diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index bb47c92476f0..ed0bdd220e1a 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -8,6 +8,8 @@ #include <linux/workqueue.h> #include <linux/bootmem.h> +#include <linux/uaccess.h> +#include <linux/sysctl.h> #include <linux/cpuset.h> #include <linux/device.h> #include <linux/export.h> @@ -29,12 +31,20 @@ #define PTF_VERTICAL (1UL) #define PTF_CHECK (2UL) +enum { + TOPOLOGY_MODE_HW, + TOPOLOGY_MODE_SINGLE, + TOPOLOGY_MODE_PACKAGE, + TOPOLOGY_MODE_UNINITIALIZED +}; + struct mask_info { struct mask_info *next; unsigned char id; cpumask_t mask; }; +static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; static void set_topology_timer(void); static void topology_work_fn(struct work_struct *work); static struct sysinfo_15_1_x *tl_info; @@ -59,11 +69,26 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) cpumask_t mask; cpumask_copy(&mask, cpumask_of(cpu)); - if (!MACHINE_HAS_TOPOLOGY) - return mask; - for (; info; info = info->next) { - if (cpumask_test_cpu(cpu, &info->mask)) - return info->mask; + switch (topology_mode) { + case TOPOLOGY_MODE_HW: + while (info) { + if (cpumask_test_cpu(cpu, &info->mask)) { + mask = info->mask; + break; + } + info = info->next; + } + if (cpumask_empty(&mask)) + cpumask_copy(&mask, cpumask_of(cpu)); + break; + case TOPOLOGY_MODE_PACKAGE: + cpumask_copy(&mask, cpu_present_mask); + break; + default: + /* fallthrough */ + case TOPOLOGY_MODE_SINGLE: + cpumask_copy(&mask, cpumask_of(cpu)); + break; } return mask; } @@ -74,7 +99,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu) int i; cpumask_copy(&mask, cpumask_of(cpu)); - if (!MACHINE_HAS_TOPOLOGY) + if (topology_mode != TOPOLOGY_MODE_HW) return mask; cpu -= cpu % (smp_cpu_mtid + 1); for (i = 0; i <= smp_cpu_mtid; i++) @@ -184,10 +209,8 @@ static void topology_update_polarization_simple(void) { int cpu; - mutex_lock(&smp_cpu_state_mutex); for_each_possible_cpu(cpu) smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); - mutex_unlock(&smp_cpu_state_mutex); } static int ptf(unsigned long fc) @@ -223,7 +246,7 @@ int topology_set_cpu_management(int fc) static void update_cpu_masks(void) { struct cpu_topology_s390 *topo; - int cpu; + int cpu, id; for_each_possible_cpu(cpu) { topo = &cpu_topology[cpu]; @@ -231,12 +254,13 @@ static void update_cpu_masks(void) topo->core_mask = cpu_group_map(&socket_info, cpu); topo->book_mask = cpu_group_map(&book_info, cpu); topo->drawer_mask = cpu_group_map(&drawer_info, cpu); - if (!MACHINE_HAS_TOPOLOGY) { + if (topology_mode != TOPOLOGY_MODE_HW) { + id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; topo->thread_id = cpu; topo->core_id = cpu; - topo->socket_id = cpu; - topo->book_id = cpu; - topo->drawer_id = cpu; + topo->socket_id = id; + topo->book_id = id; + topo->drawer_id = id; if (cpu_present(cpu)) cpumask_set_cpu(cpu, &cpus_with_topology); } @@ -254,6 +278,7 @@ static int __arch_update_cpu_topology(void) struct sysinfo_15_1_x *info = tl_info; int rc = 0; + mutex_lock(&smp_cpu_state_mutex); cpumask_clear(&cpus_with_topology); if (MACHINE_HAS_TOPOLOGY) { rc = 1; @@ -263,6 +288,7 @@ static int __arch_update_cpu_topology(void) update_cpu_masks(); if (!MACHINE_HAS_TOPOLOGY) topology_update_polarization_simple(); + mutex_unlock(&smp_cpu_state_mutex); return rc; } @@ -289,6 +315,11 @@ void topology_schedule_update(void) schedule_work(&topology_work); } +static void topology_flush_work(void) +{ + flush_work(&topology_work); +} + static void topology_timer_fn(unsigned long ignored) { if (ptf(PTF_CHECK)) @@ -459,6 +490,12 @@ void __init topology_init_early(void) struct sysinfo_15_1_x *info; set_sched_topology(s390_topology); + if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { + if (MACHINE_HAS_TOPOLOGY) + topology_mode = TOPOLOGY_MODE_HW; + else + topology_mode = TOPOLOGY_MODE_SINGLE; + } if (!MACHINE_HAS_TOPOLOGY) goto out; tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE); @@ -474,12 +511,97 @@ out: __arch_update_cpu_topology(); } +static inline int topology_get_mode(int enabled) +{ + if (!enabled) + return TOPOLOGY_MODE_SINGLE; + return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; +} + +static inline int topology_is_enabled(void) +{ + return topology_mode != TOPOLOGY_MODE_SINGLE; +} + +static int __init topology_setup(char *str) +{ + bool enabled; + int rc; + + rc = kstrtobool(str, &enabled); + if (rc) + return rc; + topology_mode = topology_get_mode(enabled); + return 0; +} +early_param("topology", topology_setup); + +static int topology_ctl_handler(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + unsigned int len; + int new_mode; + char buf[2]; + + if (!*lenp || *ppos) { + *lenp = 0; + return 0; + } + if (!write) { + strncpy(buf, topology_is_enabled() ? "1\n" : "0\n", + ARRAY_SIZE(buf)); + len = strnlen(buf, ARRAY_SIZE(buf)); + if (len > *lenp) + len = *lenp; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + goto out; + } + len = *lenp; + if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) + return -EFAULT; + if (buf[0] != '0' && buf[0] != '1') + return -EINVAL; + mutex_lock(&smp_cpu_state_mutex); + new_mode = topology_get_mode(buf[0] == '1'); + if (topology_mode != new_mode) { + topology_mode = new_mode; + topology_schedule_update(); + } + mutex_unlock(&smp_cpu_state_mutex); + topology_flush_work(); +out: + *lenp = len; + *ppos += len; + return 0; +} + +static struct ctl_table topology_ctl_table[] = { + { + .procname = "topology", + .mode = 0644, + .proc_handler = topology_ctl_handler, + }, + { }, +}; + +static struct ctl_table topology_dir_table[] = { + { + .procname = "s390", + .maxlen = 0, + .mode = 0555, + .child = topology_ctl_table, + }, + { }, +}; + static int __init topology_init(void) { if (MACHINE_HAS_TOPOLOGY) set_topology_timer(); else topology_update_polarization_simple(); + register_sysctl_table(topology_dir_table); return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); } device_initcall(topology_init); diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 8ecc25e760fa..98ffe3ee9411 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -56,13 +56,12 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { - unsigned long mask, result; struct page *head, *page; + unsigned long mask; int refs; - result = write ? 0 : _SEGMENT_ENTRY_PROTECT; - mask = result | _SEGMENT_ENTRY_INVALID; - if ((pmd_val(pmd) & mask) != result) + mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID; + if ((pmd_val(pmd) & mask) != 0) return 0; VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 246c67006ed0..8c1fcb6bad21 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -33,7 +33,7 @@ #define s3 ((16 + 2 + (3 * 256)) * 4) /* register macros */ -#define CTX %rdi +#define CTX %r12 #define RIO %rsi #define RX0 %rax @@ -56,12 +56,12 @@ #define RX2bh %ch #define RX3bh %dh -#define RT0 %rbp +#define RT0 %rdi #define RT1 %rsi #define RT2 %r8 #define RT3 %r9 -#define RT0d %ebp +#define RT0d %edi #define RT1d %esi #define RT2d %r8d #define RT3d %r9d @@ -120,13 +120,14 @@ ENTRY(__blowfish_enc_blk) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src * %rcx: bool, if true: xor output */ - movq %rbp, %r11; + movq %r12, %r11; + movq %rdi, CTX; movq %rsi, %r10; movq %rdx, RIO; @@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk) round_enc(14); add_roundkey_enc(16); - movq %r11, %rbp; + movq %r11, %r12; movq %r10, RIO; test %cl, %cl; @@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk) ENTRY(blowfish_dec_blk) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ - movq %rbp, %r11; + movq %r12, %r11; + movq %rdi, CTX; movq %rsi, %r10; movq %rdx, RIO; @@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk) movq %r10, RIO; write_block(); - movq %r11, %rbp; + movq %r11, %r12; ret; ENDPROC(blowfish_dec_blk) @@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk) ENTRY(__blowfish_enc_blk_4way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src * %rcx: bool, if true: xor output */ - pushq %rbp; + pushq %r12; pushq %rbx; pushq %rcx; - preload_roundkey_enc(0); - + movq %rdi, CTX movq %rsi, %r11; movq %rdx, RIO; + preload_roundkey_enc(0); + read_block4(); round_enc4(0); @@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way) round_enc4(14); add_preloaded_roundkey4(); - popq %rbp; + popq %r12; movq %r11, RIO; - test %bpl, %bpl; + test %r12b, %r12b; jnz .L__enc_xor4; write_block4(); popq %rbx; - popq %rbp; + popq %r12; ret; .L__enc_xor4: xor_block4(); popq %rbx; - popq %rbp; + popq %r12; ret; ENDPROC(__blowfish_enc_blk_4way) ENTRY(blowfish_dec_blk_4way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ - pushq %rbp; + pushq %r12; pushq %rbx; - preload_roundkey_dec(17); - movq %rsi, %r11; + movq %rdi, CTX; + movq %rsi, %r11 movq %rdx, RIO; + preload_roundkey_dec(17); read_block4(); round_dec4(17); @@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way) write_block4(); popq %rbx; - popq %rbp; + popq %r12; ret; ENDPROC(blowfish_dec_blk_4way) diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 310319c601ed..95ba6956a7f6 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -75,17 +75,17 @@ #define RCD1bh %dh #define RT0 %rsi -#define RT1 %rbp +#define RT1 %r12 #define RT2 %r8 #define RT0d %esi -#define RT1d %ebp +#define RT1d %r12d #define RT2d %r8d #define RT2bl %r8b #define RXOR %r9 -#define RRBP %r10 +#define RR12 %r10 #define RDST %r11 #define RXORd %r9d @@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk) * %rdx: src * %rcx: bool xor */ - movq %rbp, RRBP; + movq %r12, RR12; movq %rcx, RXOR; movq %rsi, RDST; @@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk) enc_outunpack(mov, RT1); - movq RRBP, %rbp; + movq RR12, %r12; ret; .L__enc_xor: enc_outunpack(xor, RT1); - movq RRBP, %rbp; + movq RR12, %r12; ret; ENDPROC(__camellia_enc_blk) @@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk) movl $24, RXORd; cmovel RXORd, RT2d; /* max */ - movq %rbp, RRBP; + movq %r12, RR12; movq %rsi, RDST; movq %rdx, RIO; @@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk) dec_outunpack(); - movq RRBP, %rbp; + movq RR12, %r12; ret; ENDPROC(camellia_dec_blk) @@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way) */ pushq %rbx; - movq %rbp, RRBP; + movq %r12, RR12; movq %rcx, RXOR; movq %rsi, RDST; movq %rdx, RIO; @@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way) enc_outunpack2(mov, RT2); - movq RRBP, %rbp; + movq RR12, %r12; popq %rbx; ret; .L__enc2_xor: enc_outunpack2(xor, RT2); - movq RRBP, %rbp; + movq RR12, %r12; popq %rbx; ret; ENDPROC(__camellia_enc_blk_2way) @@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way) cmovel RXORd, RT2d; /* max */ movq %rbx, RXOR; - movq %rbp, RRBP; + movq %r12, RR12; movq %rsi, RDST; movq %rdx, RIO; @@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way) dec_outunpack2(); - movq RRBP, %rbp; + movq RR12, %r12; movq RXOR, %rbx; ret; ENDPROC(camellia_dec_blk_2way) diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index b4a8806234ea..86107c961bb4 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -47,7 +47,7 @@ /********************************************************************** 16-way AVX cast5 **********************************************************************/ -#define CTX %rdi +#define CTX %r15 #define RL1 %xmm0 #define RR1 %xmm1 @@ -70,8 +70,8 @@ #define RTMP %xmm15 -#define RID1 %rbp -#define RID1d %ebp +#define RID1 %rdi +#define RID1d %edi #define RID2 %rsi #define RID2d %esi @@ -226,7 +226,7 @@ .align 16 __cast5_enc_blk16: /* input: - * %rdi: ctx, CTX + * %rdi: ctx * RL1: blocks 1 and 2 * RR1: blocks 3 and 4 * RL2: blocks 5 and 6 @@ -246,9 +246,11 @@ __cast5_enc_blk16: * RR4: encrypted blocks 15 and 16 */ - pushq %rbp; + pushq %r15; pushq %rbx; + movq %rdi, CTX; + vmovdqa .Lbswap_mask, RKM; vmovd .Lfirst_mask, R1ST; vmovd .L32_mask, R32; @@ -283,7 +285,7 @@ __cast5_enc_blk16: .L__skip_enc: popq %rbx; - popq %rbp; + popq %r15; vmovdqa .Lbswap_mask, RKM; @@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16) .align 16 __cast5_dec_blk16: /* input: - * %rdi: ctx, CTX + * %rdi: ctx * RL1: encrypted blocks 1 and 2 * RR1: encrypted blocks 3 and 4 * RL2: encrypted blocks 5 and 6 @@ -318,9 +320,11 @@ __cast5_dec_blk16: * RR4: decrypted blocks 15 and 16 */ - pushq %rbp; + pushq %r15; pushq %rbx; + movq %rdi, CTX; + vmovdqa .Lbswap_mask, RKM; vmovd .Lfirst_mask, R1ST; vmovd .L32_mask, R32; @@ -356,7 +360,7 @@ __cast5_dec_blk16: vmovdqa .Lbswap_mask, RKM; popq %rbx; - popq %rbp; + popq %r15; outunpack_blocks(RR1, RL1, RTMP, RX, RKM); outunpack_blocks(RR2, RL2, RTMP, RX, RKM); @@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16) ENTRY(cast5_ecb_enc_16way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; vmovdqu (0*4*4)(%rdx), RL1; @@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way) vmovdqu RR4, (6*4*4)(%r11); vmovdqu RL4, (7*4*4)(%r11); + popq %r15; FRAME_END ret; ENDPROC(cast5_ecb_enc_16way) ENTRY(cast5_ecb_dec_16way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN + pushq %r15; + + movq %rdi, CTX; movq %rsi, %r11; vmovdqu (0*4*4)(%rdx), RL1; @@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way) vmovdqu RR4, (6*4*4)(%r11); vmovdqu RL4, (7*4*4)(%r11); + popq %r15; FRAME_END ret; ENDPROC(cast5_ecb_dec_16way) ENTRY(cast5_cbc_dec_16way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN - pushq %r12; + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; movq %rdx, %r12; @@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way) vmovdqu RR4, (6*16)(%r11); vmovdqu RL4, (7*16)(%r11); + popq %r15; popq %r12; - FRAME_END ret; ENDPROC(cast5_cbc_dec_16way) ENTRY(cast5_ctr_16way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src * %rcx: iv (big endian, 64bit) */ FRAME_BEGIN - pushq %r12; + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; movq %rdx, %r12; @@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way) vmovdqu RR4, (6*16)(%r11); vmovdqu RL4, (7*16)(%r11); + popq %r15; popq %r12; - FRAME_END ret; ENDPROC(cast5_ctr_16way) diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 952d3156a933..7f30b6f0d72c 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -47,7 +47,7 @@ /********************************************************************** 8-way AVX cast6 **********************************************************************/ -#define CTX %rdi +#define CTX %r15 #define RA1 %xmm0 #define RB1 %xmm1 @@ -70,8 +70,8 @@ #define RTMP %xmm15 -#define RID1 %rbp -#define RID1d %ebp +#define RID1 %rdi +#define RID1d %edi #define RID2 %rsi #define RID2d %esi @@ -264,15 +264,17 @@ .align 8 __cast6_enc_blk8: /* input: - * %rdi: ctx, CTX + * %rdi: ctx * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks * output: * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks */ - pushq %rbp; + pushq %r15; pushq %rbx; + movq %rdi, CTX; + vmovdqa .Lbswap_mask, RKM; vmovd .Lfirst_mask, R1ST; vmovd .L32_mask, R32; @@ -297,7 +299,7 @@ __cast6_enc_blk8: QBAR(11); popq %rbx; - popq %rbp; + popq %r15; vmovdqa .Lbswap_mask, RKM; @@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8) .align 8 __cast6_dec_blk8: /* input: - * %rdi: ctx, CTX + * %rdi: ctx * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks * output: * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks */ - pushq %rbp; + pushq %r15; pushq %rbx; + movq %rdi, CTX; + vmovdqa .Lbswap_mask, RKM; vmovd .Lfirst_mask, R1ST; vmovd .L32_mask, R32; @@ -343,7 +347,7 @@ __cast6_dec_blk8: QBAR(0); popq %rbx; - popq %rbp; + popq %r15; vmovdqa .Lbswap_mask, RKM; outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); @@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8) ENTRY(cast6_ecb_enc_8way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); @@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way) store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; FRAME_END ret; ENDPROC(cast6_ecb_enc_8way) ENTRY(cast6_ecb_dec_8way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); @@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way) store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; FRAME_END ret; ENDPROC(cast6_ecb_dec_8way) ENTRY(cast6_cbc_dec_8way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN - pushq %r12; + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; movq %rdx, %r12; @@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way) store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; popq %r12; - FRAME_END ret; ENDPROC(cast6_cbc_dec_8way) @@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way) * %rcx: iv (little endian, 128bit) */ FRAME_BEGIN - pushq %r12; + pushq %r15 + movq %rdi, CTX; movq %rsi, %r11; movq %rdx, %r12; @@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way) store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; popq %r12; - FRAME_END ret; ENDPROC(cast6_ctr_8way) @@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way) * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) */ FRAME_BEGIN + pushq %r15; + movq %rdi, CTX movq %rsi, %r11; /* regs <= src, dst <= IVs, regs <= regs xor IVs */ @@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way) /* dst <= regs xor IVs(in dst) */ store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; FRAME_END ret; ENDPROC(cast6_xts_enc_8way) @@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way) * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) */ FRAME_BEGIN + pushq %r15; + movq %rdi, CTX movq %rsi, %r11; /* regs <= src, dst <= IVs, regs <= regs xor IVs */ @@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way) /* dst <= regs xor IVs(in dst) */ store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; FRAME_END ret; ENDPROC(cast6_xts_dec_8way) diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index f3e91647ca27..8e49ce117494 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -64,12 +64,12 @@ #define RW2bh %ch #define RT0 %r15 -#define RT1 %rbp +#define RT1 %rsi #define RT2 %r14 #define RT3 %rdx #define RT0d %r15d -#define RT1d %ebp +#define RT1d %esi #define RT2d %r14d #define RT3d %edx @@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk) * %rsi: dst * %rdx: src */ - pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %r15; + pushq %rsi; /* dst */ + read_block(%rdx, RL0, RR0); initial_permutation(RL0, RR0); @@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk) round1(32+15, RL0, RR0, dummy2); final_permutation(RR0, RL0); + + popq %rsi /* dst */ write_block(%rsi, RR0, RL0); popq %r15; @@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk) popq %r13; popq %r12; popq %rbx; - popq %rbp; ret; ENDPROC(des3_ede_x86_64_crypt_blk) @@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) * %rdx: src (3 blocks) */ - pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %r15; + pushq %rsi /* dst */ + /* load input */ movl 0 * 4(%rdx), RL0d; movl 1 * 4(%rdx), RR0d; @@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) bswapl RR2d; bswapl RL2d; + popq %rsi /* dst */ movl RR0d, 0 * 4(%rsi); movl RL0d, 1 * 4(%rsi); movl RR1d, 2 * 4(%rsi); @@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) popq %r13; popq %r12; popq %rbx; - popq %rbp; ret; ENDPROC(des3_ede_x86_64_crypt_blk_3way) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 1eab79c9ac48..9f712a7dfd79 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -89,7 +89,7 @@ #define REG_RE %rdx #define REG_RTA %r12 #define REG_RTB %rbx -#define REG_T1 %ebp +#define REG_T1 %r11d #define xmm_mov vmovups #define avx2_zeroupper vzeroupper #define RND_F1 1 @@ -637,7 +637,6 @@ _loop3: ENTRY(\name) push %rbx - push %rbp push %r12 push %r13 push %r14 @@ -673,7 +672,6 @@ _loop3: pop %r14 pop %r13 pop %r12 - pop %rbp pop %rbx ret diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index a4109506a5e8..6204bd53528c 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -37,7 +37,7 @@ #define REG_A %ecx #define REG_B %esi #define REG_C %edi -#define REG_D %ebp +#define REG_D %r12d #define REG_E %edx #define REG_T1 %eax @@ -74,10 +74,10 @@ ENTRY(\name) push %rbx - push %rbp push %r12 + push %rbp + mov %rsp, %rbp - mov %rsp, %r12 sub $64, %rsp # allocate workspace and $~15, %rsp # align stack @@ -99,10 +99,9 @@ xor %rax, %rax rep stosq - mov %r12, %rsp # deallocate workspace - - pop %r12 + mov %rbp, %rsp # deallocate workspace pop %rbp + pop %r12 pop %rbx ret diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index e08888a1a5f2..001bbcf93c79 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -103,7 +103,7 @@ SRND = %rsi # clobbers INP c = %ecx d = %r8d e = %edx -TBL = %rbp +TBL = %r12 a = %eax b = %ebx @@ -350,13 +350,13 @@ a = TMP_ ENTRY(sha256_transform_avx) .align 32 pushq %rbx - pushq %rbp + pushq %r12 pushq %r13 pushq %r14 pushq %r15 - pushq %r12 + pushq %rbp + movq %rsp, %rbp - mov %rsp, %r12 subq $STACK_SIZE, %rsp # allocate stack space and $~15, %rsp # align stack pointer @@ -452,13 +452,12 @@ loop2: done_hash: - mov %r12, %rsp - - popq %r12 + mov %rbp, %rsp + popq %rbp popq %r15 popq %r14 popq %r13 - popq %rbp + popq %r12 popq %rbx ret ENDPROC(sha256_transform_avx) diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 89c8f09787d2..1420db15dcdd 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -98,8 +98,6 @@ d = %r8d e = %edx # clobbers NUM_BLKS y3 = %esi # clobbers INP - -TBL = %rbp SRND = CTX # SRND is same register as CTX a = %eax @@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE ENTRY(sha256_transform_rorx) .align 32 pushq %rbx - pushq %rbp pushq %r12 pushq %r13 pushq %r14 @@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx) mov CTX, _CTX(%rsp) loop0: - lea K256(%rip), TBL - ## Load first 16 dwords from two blocks VMOVDQ 0*32(INP),XTMP0 VMOVDQ 1*32(INP),XTMP1 @@ -597,19 +592,19 @@ last_block_enter: .align 16 loop1: - vpaddd 0*32(TBL, SRND), X0, XFER + vpaddd K256+0*32(SRND), X0, XFER vmovdqa XFER, 0*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 0*32 - vpaddd 1*32(TBL, SRND), X0, XFER + vpaddd K256+1*32(SRND), X0, XFER vmovdqa XFER, 1*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 1*32 - vpaddd 2*32(TBL, SRND), X0, XFER + vpaddd K256+2*32(SRND), X0, XFER vmovdqa XFER, 2*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 2*32 - vpaddd 3*32(TBL, SRND), X0, XFER + vpaddd K256+3*32(SRND), X0, XFER vmovdqa XFER, 3*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 3*32 @@ -619,10 +614,11 @@ loop1: loop2: ## Do last 16 rounds with no scheduling - vpaddd 0*32(TBL, SRND), X0, XFER + vpaddd K256+0*32(SRND), X0, XFER vmovdqa XFER, 0*32+_XFER(%rsp, SRND) DO_4ROUNDS _XFER + 0*32 - vpaddd 1*32(TBL, SRND), X1, XFER + + vpaddd K256+1*32(SRND), X1, XFER vmovdqa XFER, 1*32+_XFER(%rsp, SRND) DO_4ROUNDS _XFER + 1*32 add $2*32, SRND @@ -676,9 +672,6 @@ loop3: ja done_hash do_last_block: - #### do last block - lea K256(%rip), TBL - VMOVDQ 0*16(INP),XWORD0 VMOVDQ 1*16(INP),XWORD1 VMOVDQ 2*16(INP),XWORD2 @@ -718,7 +711,6 @@ done_hash: popq %r14 popq %r13 popq %r12 - popq %rbp popq %rbx ret ENDPROC(sha256_transform_rorx) diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index 39b83c93e7fd..c6c05ed2c16a 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -95,7 +95,7 @@ SRND = %rsi # clobbers INP c = %ecx d = %r8d e = %edx -TBL = %rbp +TBL = %r12 a = %eax b = %ebx @@ -356,13 +356,13 @@ a = TMP_ ENTRY(sha256_transform_ssse3) .align 32 pushq %rbx - pushq %rbp + pushq %r12 pushq %r13 pushq %r14 pushq %r15 - pushq %r12 + pushq %rbp + mov %rsp, %rbp - mov %rsp, %r12 subq $STACK_SIZE, %rsp and $~15, %rsp @@ -462,13 +462,12 @@ loop2: done_hash: - mov %r12, %rsp - - popq %r12 + mov %rbp, %rsp + popq %rbp popq %r15 popq %r14 popq %r13 - popq %rbp + popq %r12 popq %rbx ret diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 7f5f6c6ec72e..b16d56005162 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -69,8 +69,9 @@ XFER = YTMP0 BYTE_FLIP_MASK = %ymm9 -# 1st arg -CTX = %rdi +# 1st arg is %rdi, which is saved to the stack and accessed later via %r12 +CTX1 = %rdi +CTX2 = %r12 # 2nd arg INP = %rsi # 3rd arg @@ -81,7 +82,7 @@ d = %r8 e = %rdx y3 = %rsi -TBL = %rbp +TBL = %rdi # clobbers CTX1 a = %rax b = %rbx @@ -91,26 +92,26 @@ g = %r10 h = %r11 old_h = %r11 -T1 = %r12 +T1 = %r12 # clobbers CTX2 y0 = %r13 y1 = %r14 y2 = %r15 -y4 = %r12 - # Local variables (stack frame) XFER_SIZE = 4*8 SRND_SIZE = 1*8 INP_SIZE = 1*8 INPEND_SIZE = 1*8 +CTX_SIZE = 1*8 RSPSAVE_SIZE = 1*8 -GPRSAVE_SIZE = 6*8 +GPRSAVE_SIZE = 5*8 frame_XFER = 0 frame_SRND = frame_XFER + XFER_SIZE frame_INP = frame_SRND + SRND_SIZE frame_INPEND = frame_INP + INP_SIZE -frame_RSPSAVE = frame_INPEND + INPEND_SIZE +frame_CTX = frame_INPEND + INPEND_SIZE +frame_RSPSAVE = frame_CTX + CTX_SIZE frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE frame_size = frame_GPRSAVE + GPRSAVE_SIZE @@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx) mov %rax, frame_RSPSAVE(%rsp) # Save GPRs - mov %rbp, frame_GPRSAVE(%rsp) - mov %rbx, 8*1+frame_GPRSAVE(%rsp) - mov %r12, 8*2+frame_GPRSAVE(%rsp) - mov %r13, 8*3+frame_GPRSAVE(%rsp) - mov %r14, 8*4+frame_GPRSAVE(%rsp) - mov %r15, 8*5+frame_GPRSAVE(%rsp) + mov %rbx, 8*0+frame_GPRSAVE(%rsp) + mov %r12, 8*1+frame_GPRSAVE(%rsp) + mov %r13, 8*2+frame_GPRSAVE(%rsp) + mov %r14, 8*3+frame_GPRSAVE(%rsp) + mov %r15, 8*4+frame_GPRSAVE(%rsp) shl $7, NUM_BLKS # convert to bytes jz done_hash @@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx) mov NUM_BLKS, frame_INPEND(%rsp) ## load initial digest - mov 8*0(CTX),a - mov 8*1(CTX),b - mov 8*2(CTX),c - mov 8*3(CTX),d - mov 8*4(CTX),e - mov 8*5(CTX),f - mov 8*6(CTX),g - mov 8*7(CTX),h + mov 8*0(CTX1), a + mov 8*1(CTX1), b + mov 8*2(CTX1), c + mov 8*3(CTX1), d + mov 8*4(CTX1), e + mov 8*5(CTX1), f + mov 8*6(CTX1), g + mov 8*7(CTX1), h + + # save %rdi (CTX) before it gets clobbered + mov %rdi, frame_CTX(%rsp) vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK @@ -652,14 +655,15 @@ loop2: subq $1, frame_SRND(%rsp) jne loop2 - addm 8*0(CTX),a - addm 8*1(CTX),b - addm 8*2(CTX),c - addm 8*3(CTX),d - addm 8*4(CTX),e - addm 8*5(CTX),f - addm 8*6(CTX),g - addm 8*7(CTX),h + mov frame_CTX(%rsp), CTX2 + addm 8*0(CTX2), a + addm 8*1(CTX2), b + addm 8*2(CTX2), c + addm 8*3(CTX2), d + addm 8*4(CTX2), e + addm 8*5(CTX2), f + addm 8*6(CTX2), g + addm 8*7(CTX2), h mov frame_INP(%rsp), INP add $128, INP @@ -669,12 +673,11 @@ loop2: done_hash: # Restore GPRs - mov frame_GPRSAVE(%rsp) ,%rbp - mov 8*1+frame_GPRSAVE(%rsp) ,%rbx - mov 8*2+frame_GPRSAVE(%rsp) ,%r12 - mov 8*3+frame_GPRSAVE(%rsp) ,%r13 - mov 8*4+frame_GPRSAVE(%rsp) ,%r14 - mov 8*5+frame_GPRSAVE(%rsp) ,%r15 + mov 8*0+frame_GPRSAVE(%rsp), %rbx + mov 8*1+frame_GPRSAVE(%rsp), %r12 + mov 8*2+frame_GPRSAVE(%rsp), %r13 + mov 8*3+frame_GPRSAVE(%rsp), %r14 + mov 8*4+frame_GPRSAVE(%rsp), %r15 # Restore Stack Pointer mov frame_RSPSAVE(%rsp), %rsp diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index b3f49d286348..73b471da3622 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -76,8 +76,8 @@ #define RT %xmm14 #define RR %xmm15 -#define RID1 %rbp -#define RID1d %ebp +#define RID1 %r13 +#define RID1d %r13d #define RID2 %rsi #define RID2d %esi @@ -259,7 +259,7 @@ __twofish_enc_blk8: vmovdqu w(CTX), RK1; - pushq %rbp; + pushq %r13; pushq %rbx; pushq %rcx; @@ -282,7 +282,7 @@ __twofish_enc_blk8: popq %rcx; popq %rbx; - popq %rbp; + popq %r13; outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); @@ -301,7 +301,7 @@ __twofish_dec_blk8: vmovdqu (w+4*4)(CTX), RK1; - pushq %rbp; + pushq %r13; pushq %rbx; inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); @@ -322,7 +322,7 @@ __twofish_dec_blk8: vmovdqu (w)(CTX), RK1; popq %rbx; - popq %rbp; + popq %r13; outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); |