diff options
196 files changed, 4449 insertions, 1947 deletions
@@ -2478,12 +2478,11 @@ S: D-90453 Nuernberg S: Germany N: Arnaldo Carvalho de Melo -E: acme@ghostprotocols.net +E: acme@kernel.org E: arnaldo.melo@gmail.com E: acme@redhat.com -W: http://oops.ghostprotocols.net:81/blog/ P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 -D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks +D: tools/, IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks S: Brazil N: Karsten Merker diff --git a/Documentation/media/uapi/v4l/pixfmt-007.rst b/Documentation/media/uapi/v4l/pixfmt-007.rst index 44bb5a7059b3..95a23a28c595 100644 --- a/Documentation/media/uapi/v4l/pixfmt-007.rst +++ b/Documentation/media/uapi/v4l/pixfmt-007.rst @@ -211,7 +211,13 @@ Colorspace sRGB (V4L2_COLORSPACE_SRGB) The :ref:`srgb` standard defines the colorspace used by most webcams and computer graphics. The default transfer function is ``V4L2_XFER_FUNC_SRGB``. The default Y'CbCr encoding is -``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is full range. +``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is limited range. + +Note that the :ref:`sycc` standard specifies full range quantization, +however all current capture hardware supported by the kernel convert +R'G'B' to limited range Y'CbCr. So choosing full range as the default +would break how applications interpret the quantization range. + The chromaticities of the primary colors and the white reference are: @@ -276,7 +282,7 @@ the following ``V4L2_YCBCR_ENC_601`` encoding as defined by :ref:`sycc`: Y' is clamped to the range [0…1] and Cb and Cr are clamped to the range [-0.5…0.5]. This transform is identical to one defined in SMPTE -170M/BT.601. The Y'CbCr quantization is full range. +170M/BT.601. The Y'CbCr quantization is limited range. .. _col-adobergb: @@ -288,10 +294,15 @@ The :ref:`adobergb` standard defines the colorspace used by computer graphics that use the AdobeRGB colorspace. This is also known as the :ref:`oprgb` standard. The default transfer function is ``V4L2_XFER_FUNC_ADOBERGB``. The default Y'CbCr encoding is -``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is full -range. The chromaticities of the primary colors and the white reference -are: +``V4L2_YCBCR_ENC_601``. The default Y'CbCr quantization is limited +range. + +Note that the :ref:`oprgb` standard specifies full range quantization, +however all current capture hardware supported by the kernel convert +R'G'B' to limited range Y'CbCr. So choosing full range as the default +would break how applications interpret the quantization range. +The chromaticities of the primary colors and the white reference are: .. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}| @@ -344,7 +355,7 @@ the following ``V4L2_YCBCR_ENC_601`` encoding: Y' is clamped to the range [0…1] and Cb and Cr are clamped to the range [-0.5…0.5]. This transform is identical to one defined in SMPTE -170M/BT.601. The Y'CbCr quantization is full range. +170M/BT.601. The Y'CbCr quantization is limited range. .. _col-bt2020: diff --git a/MAINTAINERS b/MAINTAINERS index 62b0307a7ca4..d8f71f21fb88 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -877,8 +877,8 @@ S: Odd fixes F: drivers/hwmon/applesmc.c APPLETALK NETWORK LAYER -M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> -S: Maintained +L: netdev@vger.kernel.org +S: Odd fixes F: drivers/net/appletalk/ F: net/appletalk/ @@ -6748,9 +6748,8 @@ S: Odd Fixes F: drivers/tty/ipwireless/ IPX NETWORK LAYER -M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> L: netdev@vger.kernel.org -S: Maintained +S: Odd fixes F: include/net/ipx.h F: include/uapi/linux/ipx.h F: net/ipx/ @@ -7522,8 +7521,8 @@ S: Maintained F: drivers/misc/lkdtm* LLC (802.2) -M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> -S: Maintained +L: netdev@vger.kernel.org +S: Odd fixes F: include/linux/llc.h F: include/uapi/linux/llc.h F: include/net/llc* @@ -13416,10 +13415,8 @@ S: Maintained F: drivers/input/misc/wistron_btns.c WL3501 WIRELESS PCMCIA CARD DRIVER -M: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> L: linux-wireless@vger.kernel.org -W: http://oops.ghostprotocols.net:81/blog -S: Maintained +S: Odd fixes F: drivers/net/wireless/wl3501* WOLFSON MICROELECTRONICS DRIVERS @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Fearless Coyote # *DOCUMENTATION* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1be64da0384e..e6cfe7ba2d65 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -104,6 +104,7 @@ struct cpuinfo_x86 { __u8 x86_phys_bits; /* CPUID returned core id bits: */ __u8 x86_coreid_bits; + __u8 cu_id; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1d3167269a67..2b4cf04239b6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -309,8 +309,22 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* get information required for multi-node processors */ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { + u32 eax, ebx, ecx, edx; - node_id = cpuid_ecx(0x8000001e) & 7; + cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); + + node_id = ecx & 0xff; + smp_num_siblings = ((ebx >> 8) & 0xff) + 1; + + if (c->x86 == 0x15) + c->cu_id = ebx & 0xff; + + if (c->x86 >= 0x17) { + c->cpu_core_id = ebx & 0xff; + + if (smp_num_siblings > 1) + c->x86_max_cores /= smp_num_siblings; + } /* * We may have multiple LLCs if L3 caches exist, so check if we diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9bab7a8a4293..ede03e849a8b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1015,6 +1015,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; c->x86_coreid_bits = 0; + c->cu_id = 0xff; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 46732dc3b73c..99b920d0e516 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -433,9 +433,15 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) int cpu1 = c->cpu_index, cpu2 = o->cpu_index; if (c->phys_proc_id == o->phys_proc_id && - per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) && - c->cpu_core_id == o->cpu_core_id) - return topology_sane(c, o, "smt"); + per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) { + if (c->cpu_core_id == o->cpu_core_id) + return topology_sane(c, o, "smt"); + + if ((c->cu_id != 0xff) && + (o->cu_id != 0xff) && + (c->cu_id == o->cu_id)) + return topology_sane(c, o, "smt"); + } } else if (c->phys_proc_id == o->phys_proc_id && c->cpu_core_id == o->cpu_core_id) { diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e41af597aed8..37e7cf544e51 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1356,6 +1356,9 @@ void __init tsc_init(void) (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); + /* Sanitize TSC ADJUST before cyc2ns gets initialized */ + tsc_store_and_check_tsc_adjust(true); + /* * Secondary CPUs do not run through tsc_init(), so set up * all the scale factors for all CPUs, assuming the same @@ -1386,8 +1389,6 @@ void __init tsc_init(void) if (unsynchronized_tsc()) mark_tsc_unstable("TSCs unsynchronized"); - else - tsc_store_and_check_tsc_adjust(true); check_system_tsc_reliable(); diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index d0db011051a5..728f75378475 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -286,13 +286,6 @@ void check_tsc_sync_source(int cpu) if (unsynchronized_tsc()) return; - if (tsc_clocksource_reliable) { - if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) - pr_info( - "Skipped synchronization checks as TSC is reliable.\n"); - return; - } - /* * Set the maximum number of test runs to * 1 if the CPU does not provide the TSC_ADJUST MSR @@ -380,14 +373,19 @@ void check_tsc_sync_target(void) int cpus = 2; /* Also aborts if there is no TSC. */ - if (unsynchronized_tsc() || tsc_clocksource_reliable) + if (unsynchronized_tsc()) return; /* * Store, verify and sanitize the TSC adjust register. If * successful skip the test. + * + * The test is also skipped when the TSC is marked reliable. This + * is true for SoCs which have no fallback clocksource. On these + * SoCs the TSC is frequency synchronized, but still the TSC ADJUST + * register might have been wreckaged by the BIOS.. */ - if (tsc_store_and_check_tsc_adjust(false)) { + if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) { atomic_inc(&skip_test); return; } diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index ea9c49adaa1f..8aa6bea1cd6c 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -15,6 +15,7 @@ #include <linux/debugfs.h> #include <linux/mm.h> #include <linux/init.h> +#include <linux/sched.h> #include <linux/seq_file.h> #include <asm/pgtable.h> @@ -406,6 +407,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, } else note_page(m, &st, __pgprot(0), 1); + cond_resched(); start++; } diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 471ddfd93ea8..5ec109533bb9 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -2132,12 +2132,8 @@ idt77252_init_est(struct vc_map *vc, int pcr) est->interval = 2; /* XXX: make this configurable */ est->ewma_log = 2; /* XXX: make this configurable */ - init_timer(&est->timer); - est->timer.data = (unsigned long)vc; - est->timer.function = idt77252_est_timer; - - est->timer.expires = jiffies + ((HZ / 4) << est->interval); - add_timer(&est->timer); + setup_timer(&est->timer, idt77252_est_timer, (unsigned long)vc); + mod_timer(&est->timer, jiffies + ((HZ / 4) << est->interval)); return est; } @@ -3638,9 +3634,7 @@ static int idt77252_init_one(struct pci_dev *pcidev, spin_lock_init(&card->cmd_lock); spin_lock_init(&card->tst_lock); - init_timer(&card->tst_timer); - card->tst_timer.data = (unsigned long)card; - card->tst_timer.function = tst_timer; + setup_timer(&card->tst_timer, tst_timer, (unsigned long)card); /* Do the I/O remapping... */ card->membase = ioremap(membase, 1024); diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c index 54a5e870a8f5..efbcf8435185 100644 --- a/drivers/irqchip/irq-keystone.c +++ b/drivers/irqchip/irq-keystone.c @@ -19,9 +19,9 @@ #include <linux/bitops.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/interrupt.h> #include <linux/irqdomain.h> #include <linux/irqchip.h> -#include <linux/irqchip/chained_irq.h> #include <linux/of.h> #include <linux/of_platform.h> #include <linux/mfd/syscon.h> @@ -39,6 +39,7 @@ struct keystone_irq_device { struct irq_domain *irqd; struct regmap *devctrl_regs; u32 devctrl_offset; + raw_spinlock_t wa_lock; }; static inline u32 keystone_irq_readl(struct keystone_irq_device *kirq) @@ -83,17 +84,15 @@ static void keystone_irq_ack(struct irq_data *d) /* nothing to do here */ } -static void keystone_irq_handler(struct irq_desc *desc) +static irqreturn_t keystone_irq_handler(int irq, void *keystone_irq) { - unsigned int irq = irq_desc_get_irq(desc); - struct keystone_irq_device *kirq = irq_desc_get_handler_data(desc); + struct keystone_irq_device *kirq = keystone_irq; + unsigned long wa_lock_flags; unsigned long pending; int src, virq; dev_dbg(kirq->dev, "start irq %d\n", irq); - chained_irq_enter(irq_desc_get_chip(desc), desc); - pending = keystone_irq_readl(kirq); keystone_irq_writel(kirq, pending); @@ -111,13 +110,15 @@ static void keystone_irq_handler(struct irq_desc *desc) if (!virq) dev_warn(kirq->dev, "spurious irq detected hwirq %d, virq %d\n", src, virq); + raw_spin_lock_irqsave(&kirq->wa_lock, wa_lock_flags); generic_handle_irq(virq); + raw_spin_unlock_irqrestore(&kirq->wa_lock, + wa_lock_flags); } } - chained_irq_exit(irq_desc_get_chip(desc), desc); - dev_dbg(kirq->dev, "end irq %d\n", irq); + return IRQ_HANDLED; } static int keystone_irq_map(struct irq_domain *h, unsigned int virq, @@ -182,9 +183,16 @@ static int keystone_irq_probe(struct platform_device *pdev) return -ENODEV; } + raw_spin_lock_init(&kirq->wa_lock); + platform_set_drvdata(pdev, kirq); - irq_set_chained_handler_and_data(kirq->irq, keystone_irq_handler, kirq); + ret = request_irq(kirq->irq, keystone_irq_handler, + 0, dev_name(dev), kirq); + if (ret) { + irq_domain_remove(kirq->irqd); + return ret; + } /* clear all source bits */ keystone_irq_writel(kirq, ~0x0); @@ -199,6 +207,8 @@ static int keystone_irq_remove(struct platform_device *pdev) struct keystone_irq_device *kirq = platform_get_drvdata(pdev); int hwirq; + free_irq(kirq->irq, kirq); + for (hwirq = 0; hwirq < KEYSTONE_N_IRQ; hwirq++) irq_dispose_mapping(irq_find_mapping(kirq->irqd, hwirq)); diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index 17304705f2cf..05fa9f7af53c 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c @@ -131,12 +131,16 @@ static struct irq_chip mxs_icoll_chip = { .irq_ack = icoll_ack_irq, .irq_mask = icoll_mask_irq, .irq_unmask = icoll_unmask_irq, + .flags = IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SKIP_SET_WAKE, }; static struct irq_chip asm9260_icoll_chip = { .irq_ack = icoll_ack_irq, .irq_mask = asm9260_mask_irq, .irq_unmask = asm9260_unmask_irq, + .flags = IRQCHIP_MASK_ON_SUSPEND | + IRQCHIP_SKIP_SET_WAKE, }; asmlinkage void __exception_irq_entry icoll_handle_irq(struct pt_regs *regs) diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index 87a6b65ed3af..ccda41c2c9e4 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -612,8 +612,7 @@ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg, } memset(msg->msg + msg->len, 0, sizeof(msg->msg) - msg->len); if (msg->len == 1) { - if (cec_msg_initiator(msg) != 0xf || - cec_msg_destination(msg) == 0xf) { + if (cec_msg_destination(msg) == 0xf) { dprintk(1, "cec_transmit_msg: invalid poll message\n"); return -EINVAL; } @@ -638,7 +637,7 @@ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg, dprintk(1, "cec_transmit_msg: destination is the adapter itself\n"); return -EINVAL; } - if (cec_msg_initiator(msg) != 0xf && + if (msg->len > 1 && adap->is_configured && !cec_has_log_addr(adap, cec_msg_initiator(msg))) { dprintk(1, "cec_transmit_msg: initiator has unknown logical address %d\n", cec_msg_initiator(msg)); @@ -1072,7 +1071,7 @@ static int cec_config_log_addr(struct cec_adapter *adap, /* Send poll message */ msg.len = 1; - msg.msg[0] = 0xf0 | log_addr; + msg.msg[0] = (log_addr << 4) | log_addr; err = cec_transmit_msg_fh(adap, &msg, NULL, true); /* diff --git a/drivers/media/usb/siano/smsusb.c b/drivers/media/usb/siano/smsusb.c index a4dcaec31d02..8c1f926567ec 100644 --- a/drivers/media/usb/siano/smsusb.c +++ b/drivers/media/usb/siano/smsusb.c @@ -218,22 +218,30 @@ static int smsusb_start_streaming(struct smsusb_device_t *dev) static int smsusb_sendrequest(void *context, void *buffer, size_t size) { struct smsusb_device_t *dev = (struct smsusb_device_t *) context; - struct sms_msg_hdr *phdr = (struct sms_msg_hdr *) buffer; - int dummy; + struct sms_msg_hdr *phdr; + int dummy, ret; if (dev->state != SMSUSB_ACTIVE) { pr_debug("Device not active yet\n"); return -ENOENT; } + phdr = kmalloc(size, GFP_KERNEL); + if (!phdr) + return -ENOMEM; + memcpy(phdr, buffer, size); + pr_debug("sending %s(%d) size: %d\n", smscore_translate_msg(phdr->msg_type), phdr->msg_type, phdr->msg_length); smsendian_handle_tx_message((struct sms_msg_data *) phdr); - smsendian_handle_message_header((struct sms_msg_hdr *)buffer); - return usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, 2), - buffer, size, &dummy, 1000); + smsendian_handle_message_header((struct sms_msg_hdr *)phdr); + ret = usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, 2), + phdr, size, &dummy, 1000); + + kfree(phdr); + return ret; } static char *smsusb1_fw_lkup[] = { diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 7b4e40b286e4..03dc886ed3d6 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3111,6 +3111,7 @@ static const struct mv88e6xxx_ops mv88e6085_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .ppu_enable = mv88e6185_g1_ppu_enable, .ppu_disable = mv88e6185_g1_ppu_disable, @@ -3159,6 +3160,7 @@ static const struct mv88e6xxx_ops mv88e6097_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3179,6 +3181,7 @@ static const struct mv88e6xxx_ops mv88e6123_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3205,6 +3208,7 @@ static const struct mv88e6xxx_ops mv88e6131_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .ppu_enable = mv88e6185_g1_ppu_enable, .ppu_disable = mv88e6185_g1_ppu_disable, @@ -3232,6 +3236,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3250,6 +3255,7 @@ static const struct mv88e6xxx_ops mv88e6165_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3276,6 +3282,7 @@ static const struct mv88e6xxx_ops mv88e6171_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3304,6 +3311,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3330,6 +3338,7 @@ static const struct mv88e6xxx_ops mv88e6175_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3358,6 +3367,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3380,6 +3390,7 @@ static const struct mv88e6xxx_ops mv88e6185_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .ppu_enable = mv88e6185_g1_ppu_enable, .ppu_disable = mv88e6185_g1_ppu_disable, @@ -3409,6 +3420,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .stats_get_stats = mv88e6390_stats_get_stats, .g1_set_cpu_port = mv88e6390_g1_set_cpu_port, .g1_set_egress_port = mv88e6390_g1_set_egress_port, + .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3436,6 +3448,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .stats_get_stats = mv88e6390_stats_get_stats, .g1_set_cpu_port = mv88e6390_g1_set_cpu_port, .g1_set_egress_port = mv88e6390_g1_set_egress_port, + .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3463,6 +3476,7 @@ static const struct mv88e6xxx_ops mv88e6191_ops = { .stats_get_stats = mv88e6390_stats_get_stats, .g1_set_cpu_port = mv88e6390_g1_set_cpu_port, .g1_set_egress_port = mv88e6390_g1_set_egress_port, + .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3491,6 +3505,7 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3519,6 +3534,7 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .stats_get_stats = mv88e6390_stats_get_stats, .g1_set_cpu_port = mv88e6390_g1_set_cpu_port, .g1_set_egress_port = mv88e6390_g1_set_egress_port, + .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3598,6 +3614,7 @@ static const struct mv88e6xxx_ops mv88e6350_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3624,6 +3641,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3652,6 +3670,7 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .stats_get_stats = mv88e6095_stats_get_stats, .g1_set_cpu_port = mv88e6095_g1_set_cpu_port, .g1_set_egress_port = mv88e6095_g1_set_egress_port, + .watchdog_ops = &mv88e6097_watchdog_ops, .mgmt_rsvd2cpu = mv88e6095_g2_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3680,6 +3699,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .stats_get_stats = mv88e6390_stats_get_stats, .g1_set_cpu_port = mv88e6390_g1_set_cpu_port, .g1_set_egress_port = mv88e6390_g1_set_egress_port, + .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3708,6 +3728,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .stats_get_stats = mv88e6390_stats_get_stats, .g1_set_cpu_port = mv88e6390_g1_set_cpu_port, .g1_set_egress_port = mv88e6390_g1_set_egress_port, + .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3738,6 +3759,7 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .stats_get_stats = mv88e6390_stats_get_stats, .g1_set_cpu_port = mv88e6390_g1_set_cpu_port, .g1_set_egress_port = mv88e6390_g1_set_egress_port, + .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3767,6 +3789,7 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .stats_get_stats = mv88e6390_stats_get_stats, .g1_set_cpu_port = mv88e6390_g1_set_cpu_port, .g1_set_egress_port = mv88e6390_g1_set_egress_port, + .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; @@ -3794,6 +3817,7 @@ static const struct mv88e6xxx_ops mv88e6391_ops = { .stats_get_stats = mv88e6390_stats_get_stats, .g1_set_cpu_port = mv88e6390_g1_set_cpu_port, .g1_set_egress_port = mv88e6390_g1_set_egress_port, + .watchdog_ops = &mv88e6390_watchdog_ops, .mgmt_rsvd2cpu = mv88e6390_g1_mgmt_rsvd2cpu, .reset = mv88e6352_g1_reset, }; diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 50e4e0be4227..8f15bc7b1f5f 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -649,6 +649,139 @@ int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, return mv88e6xxx_g2_smi_phy_write_c22(chip, addr, reg, val, external); } +static int mv88e6097_watchdog_action(struct mv88e6xxx_chip *chip, int irq) +{ + u16 reg; + + mv88e6xxx_g2_read(chip, GLOBAL2_WDOG_CONTROL, ®); + + dev_info(chip->dev, "Watchdog event: 0x%04x", reg); + + return IRQ_HANDLED; +} + +static void mv88e6097_watchdog_free(struct mv88e6xxx_chip *chip) +{ + u16 reg; + + mv88e6xxx_g2_read(chip, GLOBAL2_WDOG_CONTROL, ®); + + reg &= ~(GLOBAL2_WDOG_CONTROL_EGRESS_ENABLE | + GLOBAL2_WDOG_CONTROL_QC_ENABLE); + + mv88e6xxx_g2_write(chip, GLOBAL2_WDOG_CONTROL, reg); +} + +static int mv88e6097_watchdog_setup(struct mv88e6xxx_chip *chip) +{ + return mv88e6xxx_g2_write(chip, GLOBAL2_WDOG_CONTROL, + GLOBAL2_WDOG_CONTROL_EGRESS_ENABLE | + GLOBAL2_WDOG_CONTROL_QC_ENABLE | + GLOBAL2_WDOG_CONTROL_SWRESET); +} + +const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = { + .irq_action = mv88e6097_watchdog_action, + .irq_setup = mv88e6097_watchdog_setup, + .irq_free = mv88e6097_watchdog_free, +}; + +static int mv88e6390_watchdog_setup(struct mv88e6xxx_chip *chip) +{ + return mv88e6xxx_g2_update(chip, GLOBAL2_WDOG_CONTROL, + GLOBAL2_WDOG_INT_ENABLE | + GLOBAL2_WDOG_CUT_THROUGH | + GLOBAL2_WDOG_QUEUE_CONTROLLER | + GLOBAL2_WDOG_EGRESS | + GLOBAL2_WDOG_FORCE_IRQ); +} + +static int mv88e6390_watchdog_action(struct mv88e6xxx_chip *chip, int irq) +{ + int err; + u16 reg; + + mv88e6xxx_g2_write(chip, GLOBAL2_WDOG_CONTROL, GLOBAL2_WDOG_EVENT); + err = mv88e6xxx_g2_read(chip, GLOBAL2_WDOG_CONTROL, ®); + + dev_info(chip->dev, "Watchdog event: 0x%04x", + reg & GLOBAL2_WDOG_DATA_MASK); + + mv88e6xxx_g2_write(chip, GLOBAL2_WDOG_CONTROL, GLOBAL2_WDOG_HISTORY); + err = mv88e6xxx_g2_read(chip, GLOBAL2_WDOG_CONTROL, ®); + + dev_info(chip->dev, "Watchdog history: 0x%04x", + reg & GLOBAL2_WDOG_DATA_MASK); + + /* Trigger a software reset to try to recover the switch */ + if (chip->info->ops->reset) + chip->info->ops->reset(chip); + + mv88e6390_watchdog_setup(chip); + + return IRQ_HANDLED; +} + +static void mv88e6390_watchdog_free(struct mv88e6xxx_chip *chip) +{ + mv88e6xxx_g2_update(chip, GLOBAL2_WDOG_CONTROL, + GLOBAL2_WDOG_INT_ENABLE); +} + +const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = { + .irq_action = mv88e6390_watchdog_action, + .irq_setup = mv88e6390_watchdog_setup, + .irq_free = mv88e6390_watchdog_free, +}; + +static irqreturn_t mv88e6xxx_g2_watchdog_thread_fn(int irq, void *dev_id) +{ + struct mv88e6xxx_chip *chip = dev_id; + irqreturn_t ret = IRQ_NONE; + + mutex_lock(&chip->reg_lock); + if (chip->info->ops->watchdog_ops->irq_action) + ret = chip->info->ops->watchdog_ops->irq_action(chip, irq); + mutex_unlock(&chip->reg_lock); + + return ret; +} + +static void mv88e6xxx_g2_watchdog_free(struct mv88e6xxx_chip *chip) +{ + mutex_lock(&chip->reg_lock); + if (chip->info->ops->watchdog_ops->irq_free) + chip->info->ops->watchdog_ops->irq_free(chip); + mutex_unlock(&chip->reg_lock); + + free_irq(chip->watchdog_irq, chip); + irq_dispose_mapping(chip->watchdog_irq); +} + +static int mv88e6xxx_g2_watchdog_setup(struct mv88e6xxx_chip *chip) +{ + int err; + + chip->watchdog_irq = irq_find_mapping(chip->g2_irq.domain, + GLOBAL2_INT_SOURCE_WATCHDOG); + if (chip->watchdog_irq < 0) + return chip->watchdog_irq; + + err = request_threaded_irq(chip->watchdog_irq, NULL, + mv88e6xxx_g2_watchdog_thread_fn, + IRQF_ONESHOT | IRQF_TRIGGER_FALLING, + "mv88e6xxx-watchdog", chip); + if (err) + return err; + + mutex_lock(&chip->reg_lock); + if (chip->info->ops->watchdog_ops->irq_setup) + err = chip->info->ops->watchdog_ops->irq_setup(chip); + mutex_unlock(&chip->reg_lock); + + return err; +} + static void mv88e6xxx_g2_irq_mask(struct irq_data *d) { struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); @@ -737,6 +870,8 @@ void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip) { int irq, virq; + mv88e6xxx_g2_watchdog_free(chip); + free_irq(chip->device_irq, chip); irq_dispose_mapping(chip->device_irq); @@ -779,7 +914,7 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip) if (err) goto out; - return 0; + return mv88e6xxx_g2_watchdog_setup(chip); out: for (irq = 0; irq < 16; irq++) { diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index 00e635279ba1..a8b2f9486a4a 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -46,6 +46,9 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip); void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip); int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip); +extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops; +extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops; + #else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) @@ -125,6 +128,9 @@ static inline int mv88e6095_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) return -EOPNOTSUPP; } +static const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = {}; +static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {}; + #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ #endif /* _MV88E6XXX_GLOBAL2_H */ diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index ac54f40813f7..6033f2f6260a 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -339,6 +339,7 @@ #define GLOBAL_STATS_COUNTER_01 0x1f #define GLOBAL2_INT_SOURCE 0x00 +#define GLOBAL2_INT_SOURCE_WATCHDOG 15 #define GLOBAL2_INT_MASK 0x01 #define GLOBAL2_MGMT_EN_2X 0x02 #define GLOBAL2_MGMT_EN_0X 0x03 @@ -415,6 +416,25 @@ #define GLOBAL2_SCRATCH_REGISTER_SHIFT 8 #define GLOBAL2_SCRATCH_VALUE_MASK 0xff #define GLOBAL2_WDOG_CONTROL 0x1b +#define GLOBAL2_WDOG_CONTROL_EGRESS_EVENT BIT(7) +#define GLOBAL2_WDOG_CONTROL_RMU_TIMEOUT BIT(6) +#define GLOBAL2_WDOG_CONTROL_QC_ENABLE BIT(5) +#define GLOBAL2_WDOG_CONTROL_EGRESS_HISTORY BIT(4) +#define GLOBAL2_WDOG_CONTROL_EGRESS_ENABLE BIT(3) +#define GLOBAL2_WDOG_CONTROL_FORCE_IRQ BIT(2) +#define GLOBAL2_WDOG_CONTROL_HISTORY BIT(1) +#define GLOBAL2_WDOG_CONTROL_SWRESET BIT(0) +#define GLOBAL2_WDOG_UPDATE BIT(15) +#define GLOBAL2_WDOG_INT_SOURCE (0x00 << 8) +#define GLOBAL2_WDOG_INT_STATUS (0x10 << 8) +#define GLOBAL2_WDOG_INT_ENABLE (0x11 << 8) +#define GLOBAL2_WDOG_EVENT (0x12 << 8) +#define GLOBAL2_WDOG_HISTORY (0x13 << 8) +#define GLOBAL2_WDOG_DATA_MASK 0xff +#define GLOBAL2_WDOG_CUT_THROUGH BIT(3) +#define GLOBAL2_WDOG_QUEUE_CONTROLLER BIT(2) +#define GLOBAL2_WDOG_EGRESS BIT(1) +#define GLOBAL2_WDOG_FORCE_IRQ BIT(0) #define GLOBAL2_QOS_WEIGHT 0x1c #define GLOBAL2_MISC 0x1d @@ -666,6 +686,7 @@ enum mv88e6xxx_cap { #define MV88E6XXX_FLAGS_FAMILY_6390 \ (MV88E6XXX_FLAG_EEE | \ MV88E6XXX_FLAG_GLOBAL2 | \ + MV88E6XXX_FLAG_G2_INT | \ MV88E6XXX_FLAG_STU | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ @@ -706,6 +727,7 @@ struct mv88e6xxx_vtu_entry { }; struct mv88e6xxx_bus_ops; +struct mv88e6xxx_irq_ops; struct mv88e6xxx_irq { u16 masked; @@ -766,6 +788,7 @@ struct mv88e6xxx_chip { struct mv88e6xxx_irq g2_irq; int irq; int device_irq; + int watchdog_irq; }; struct mv88e6xxx_bus_ops { @@ -879,11 +902,21 @@ struct mv88e6xxx_ops { uint64_t *data); int (*g1_set_cpu_port)(struct mv88e6xxx_chip *chip, int port); int (*g1_set_egress_port)(struct mv88e6xxx_chip *chip, int port); + const struct mv88e6xxx_irq_ops *watchdog_ops; /* Can be either in g1 or g2, so don't use a prefix */ int (*mgmt_rsvd2cpu)(struct mv88e6xxx_chip *chip); }; +struct mv88e6xxx_irq_ops { + /* Action to be performed when the interrupt happens */ + int (*irq_action)(struct mv88e6xxx_chip *chip, int irq); + /* Setup the hardware to generate the interrupt */ + int (*irq_setup)(struct mv88e6xxx_chip *chip); + /* Reset the hardware to stop generating the interrupt */ + void (*irq_free)(struct mv88e6xxx_chip *chip); +}; + #define STATS_TYPE_PORT BIT(0) #define STATS_TYPE_BANK0 BIT(1) #define STATS_TYPE_BANK1 BIT(2) diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index a8a22c0d688a..86369d7c9a0f 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -291,7 +291,10 @@ struct pcnet32_private { int options; unsigned int shared_irq:1, /* shared irq possible */ dxsuflo:1, /* disable transmit stop on uflo */ - mii:1; /* mii port available */ + mii:1, /* mii port available */ + autoneg:1, /* autoneg enabled */ + port_tp:1, /* port set to TP */ + fdx:1; /* full duplex enabled */ struct net_device *next; struct mii_if_info mii_if; struct timer_list watchdog_timer; @@ -677,6 +680,52 @@ static void pcnet32_poll_controller(struct net_device *dev) } #endif +/* + * lp->lock must be held. + */ +static int pcnet32_suspend(struct net_device *dev, unsigned long *flags, + int can_sleep) +{ + int csr5; + struct pcnet32_private *lp = netdev_priv(dev); + const struct pcnet32_access *a = lp->a; + ulong ioaddr = dev->base_addr; + int ticks; + + /* really old chips have to be stopped. */ + if (lp->chip_version < PCNET32_79C970A) + return 0; + + /* set SUSPEND (SPND) - CSR5 bit 0 */ + csr5 = a->read_csr(ioaddr, CSR5); + a->write_csr(ioaddr, CSR5, csr5 | CSR5_SUSPEND); + + /* poll waiting for bit to be set */ + ticks = 0; + while (!(a->read_csr(ioaddr, CSR5) & CSR5_SUSPEND)) { + spin_unlock_irqrestore(&lp->lock, *flags); + if (can_sleep) + msleep(1); + else + mdelay(1); + spin_lock_irqsave(&lp->lock, *flags); + ticks++; + if (ticks > 200) { + netif_printk(lp, hw, KERN_DEBUG, dev, + "Error getting into suspend!\n"); + return 0; + } + } + return 1; +} + +static void pcnet32_clr_suspend(struct pcnet32_private *lp, ulong ioaddr) +{ + int csr5 = lp->a->read_csr(ioaddr, CSR5); + /* clear SUSPEND (SPND) - CSR5 bit 0 */ + lp->a->write_csr(ioaddr, CSR5, csr5 & ~CSR5_SUSPEND); +} + static int pcnet32_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { @@ -684,12 +733,29 @@ static int pcnet32_get_link_ksettings(struct net_device *dev, unsigned long flags; int r = -EOPNOTSUPP; + spin_lock_irqsave(&lp->lock, flags); if (lp->mii) { - spin_lock_irqsave(&lp->lock, flags); mii_ethtool_get_link_ksettings(&lp->mii_if, cmd); - spin_unlock_irqrestore(&lp->lock, flags); + r = 0; + } else if (lp->chip_version == PCNET32_79C970A) { + if (lp->autoneg) { + cmd->base.autoneg = AUTONEG_ENABLE; + if (lp->a->read_bcr(dev->base_addr, 4) == 0xc0) + cmd->base.port = PORT_AUI; + else + cmd->base.port = PORT_TP; + } else { + cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.port = lp->port_tp ? PORT_TP : PORT_AUI; + } + cmd->base.duplex = lp->fdx ? DUPLEX_FULL : DUPLEX_HALF; + cmd->base.speed = SPEED_10; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, + SUPPORTED_TP | SUPPORTED_AUI); r = 0; } + spin_unlock_irqrestore(&lp->lock, flags); return r; } @@ -697,14 +763,46 @@ static int pcnet32_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { struct pcnet32_private *lp = netdev_priv(dev); + ulong ioaddr = dev->base_addr; unsigned long flags; int r = -EOPNOTSUPP; + int suspended, bcr2, bcr9, csr15; + spin_lock_irqsave(&lp->lock, flags); if (lp->mii) { - spin_lock_irqsave(&lp->lock, flags); r = mii_ethtool_set_link_ksettings(&lp->mii_if, cmd); - spin_unlock_irqrestore(&lp->lock, flags); + } else if (lp->chip_version == PCNET32_79C970A) { + suspended = pcnet32_suspend(dev, &flags, 0); + if (!suspended) + lp->a->write_csr(ioaddr, CSR0, CSR0_STOP); + + lp->autoneg = cmd->base.autoneg == AUTONEG_ENABLE; + bcr2 = lp->a->read_bcr(ioaddr, 2); + if (cmd->base.autoneg == AUTONEG_ENABLE) { + lp->a->write_bcr(ioaddr, 2, bcr2 | 0x0002); + } else { + lp->a->write_bcr(ioaddr, 2, bcr2 & ~0x0002); + + lp->port_tp = cmd->base.port == PORT_TP; + csr15 = lp->a->read_csr(ioaddr, CSR15) & ~0x0180; + if (cmd->base.port == PORT_TP) + csr15 |= 0x0080; + lp->a->write_csr(ioaddr, CSR15, csr15); + lp->init_block->mode = cpu_to_le16(csr15); + + lp->fdx = cmd->base.duplex == DUPLEX_FULL; + bcr9 = lp->a->read_bcr(ioaddr, 9) & ~0x0003; + if (cmd->base.duplex == DUPLEX_FULL) + bcr9 |= 0x0003; + lp->a->write_bcr(ioaddr, 9, bcr9); + } + if (suspended) + pcnet32_clr_suspend(lp, ioaddr); + else if (netif_running(dev)) + pcnet32_restart(dev, CSR0_NORMAL); + r = 0; } + spin_unlock_irqrestore(&lp->lock, flags); return r; } @@ -732,7 +830,14 @@ static u32 pcnet32_get_link(struct net_device *dev) spin_lock_irqsave(&lp->lock, flags); if (lp->mii) { r = mii_link_ok(&lp->mii_if); - } else if (lp->chip_version >= PCNET32_79C970A) { + } else if (lp->chip_version == PCNET32_79C970A) { + ulong ioaddr = dev->base_addr; /* card base I/O address */ + /* only read link if port is set to TP */ + if (!lp->autoneg && lp->port_tp) + r = (lp->a->read_bcr(ioaddr, 4) != 0xc0); + else /* link always up for AUI port or port auto select */ + r = 1; + } else if (lp->chip_version > PCNET32_79C970A) { ulong ioaddr = dev->base_addr; /* card base I/O address */ r = (lp->a->read_bcr(ioaddr, 4) != 0xc0); } else { /* can not detect link on really old chips */ @@ -1070,45 +1175,6 @@ static int pcnet32_set_phys_id(struct net_device *dev, } /* - * lp->lock must be held. - */ -static int pcnet32_suspend(struct net_device *dev, unsigned long *flags, - int can_sleep) -{ - int csr5; - struct pcnet32_private *lp = netdev_priv(dev); - const struct pcnet32_access *a = lp->a; - ulong ioaddr = dev->base_addr; - int ticks; - - /* really old chips have to be stopped. */ - if (lp->chip_version < PCNET32_79C970A) - return 0; - - /* set SUSPEND (SPND) - CSR5 bit 0 */ - csr5 = a->read_csr(ioaddr, CSR5); - a->write_csr(ioaddr, CSR5, csr5 | CSR5_SUSPEND); - - /* poll waiting for bit to be set */ - ticks = 0; - while (!(a->read_csr(ioaddr, CSR5) & CSR5_SUSPEND)) { - spin_unlock_irqrestore(&lp->lock, *flags); - if (can_sleep) - msleep(1); - else - mdelay(1); - spin_lock_irqsave(&lp->lock, *flags); - ticks++; - if (ticks > 200) { - netif_printk(lp, hw, KERN_DEBUG, dev, - "Error getting into suspend!\n"); - return 0; - } - } - return 1; -} - -/* * process one receive descriptor entry */ @@ -1425,13 +1491,8 @@ static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs, } } - if (!(csr0 & CSR0_STOP)) { /* If not stopped */ - int csr5; - - /* clear SUSPEND (SPND) - CSR5 bit 0 */ - csr5 = a->read_csr(ioaddr, CSR5); - a->write_csr(ioaddr, CSR5, csr5 & (~CSR5_SUSPEND)); - } + if (!(csr0 & CSR0_STOP)) /* If not stopped */ + pcnet32_clr_suspend(lp, ioaddr); spin_unlock_irqrestore(&lp->lock, flags); } @@ -1812,6 +1873,9 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) lp->options = PCNET32_PORT_ASEL; else lp->options = options_mapping[options[cards_found]]; + /* force default port to TP on 79C970A so link detection can work */ + if (lp->chip_version == PCNET32_79C970A) + lp->options = PCNET32_PORT_10BT; lp->mii_if.dev = dev; lp->mii_if.mdio_read = mdio_read; lp->mii_if.mdio_write = mdio_write; @@ -2063,6 +2127,10 @@ static int pcnet32_open(struct net_device *dev) (u32) (lp->rx_ring_dma_addr), (u32) (lp->init_dma_addr)); + lp->autoneg = !!(lp->options & PCNET32_PORT_ASEL); + lp->port_tp = !!(lp->options & PCNET32_PORT_10BT); + lp->fdx = !!(lp->options & PCNET32_PORT_FD); + /* set/reset autoselect bit */ val = lp->a->read_bcr(ioaddr, 2) & ~2; if (lp->options & PCNET32_PORT_ASEL) @@ -2675,10 +2743,7 @@ static void pcnet32_set_multicast_list(struct net_device *dev) } if (suspended) { - int csr5; - /* clear SUSPEND (SPND) - CSR5 bit 0 */ - csr5 = lp->a->read_csr(ioaddr, CSR5); - lp->a->write_csr(ioaddr, CSR5, csr5 & (~CSR5_SUSPEND)); + pcnet32_clr_suspend(lp, ioaddr); } else { lp->a->write_csr(ioaddr, CSR0, CSR0_STOP); pcnet32_restart(dev, CSR0_NORMAL); @@ -2789,6 +2854,13 @@ static void pcnet32_check_media(struct net_device *dev, int verbose) if (lp->mii) { curr_link = mii_link_ok(&lp->mii_if); + } else if (lp->chip_version == PCNET32_79C970A) { + ulong ioaddr = dev->base_addr; /* card base I/O address */ + /* only read link if port is set to TP */ + if (!lp->autoneg && lp->port_tp) + curr_link = (lp->a->read_bcr(ioaddr, 4) != 0xc0); + else /* link always up for AUI port or port auto select */ + curr_link = 1; } else { ulong ioaddr = dev->base_addr; /* card base I/O address */ curr_link = (lp->a->read_bcr(ioaddr, 4) != 0xc0); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index bed25abd2889..aa22a7ce710b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -737,48 +737,70 @@ err_exit:; void aq_nic_get_link_ksettings(struct aq_nic_s *self, struct ethtool_link_ksettings *cmd) { - u32 supported, advertising; - cmd->base.port = PORT_TP; /* This driver supports only 10G capable adapters, so DUPLEX_FULL */ cmd->base.duplex = DUPLEX_FULL; cmd->base.autoneg = self->aq_nic_cfg.is_autoneg; - ethtool_convert_link_mode_to_legacy_u32(&supported, - cmd->link_modes.supported); - ethtool_convert_link_mode_to_legacy_u32(&advertising, - cmd->link_modes.advertising); - - supported |= (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_10G) ? - ADVERTISED_10000baseT_Full : 0U; - supported |= (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_1G) ? - ADVERTISED_1000baseT_Full : 0U; - supported |= (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_100M) ? - ADVERTISED_100baseT_Full : 0U; - supported |= self->aq_hw_caps.flow_control ? SUPPORTED_Pause : 0; - supported |= SUPPORTED_Autoneg; - supported |= SUPPORTED_TP; - - advertising = (self->aq_nic_cfg.is_autoneg) ? - ADVERTISED_Autoneg : 0U; - advertising |= - (self->aq_nic_cfg.link_speed_msk & AQ_NIC_RATE_10G) ? - ADVERTISED_10000baseT_Full : 0U; - advertising |= - (self->aq_nic_cfg.link_speed_msk & AQ_NIC_RATE_1G) ? - ADVERTISED_1000baseT_Full : 0U; - - advertising |= - (self->aq_nic_cfg.link_speed_msk & AQ_NIC_RATE_100M) ? - ADVERTISED_100baseT_Full : 0U; - advertising |= (self->aq_nic_cfg.flow_control) ? - ADVERTISED_Pause : 0U; - advertising |= ADVERTISED_TP; - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, - advertising); + ethtool_link_ksettings_zero_link_mode(cmd, supported); + + if (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_10G) + ethtool_link_ksettings_add_link_mode(cmd, supported, + 10000baseT_Full); + + if (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_5G) + ethtool_link_ksettings_add_link_mode(cmd, supported, + 5000baseT_Full); + + if (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_2GS) + ethtool_link_ksettings_add_link_mode(cmd, supported, + 2500baseT_Full); + + if (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_1G) + ethtool_link_ksettings_add_link_mode(cmd, supported, + 1000baseT_Full); + + if (self->aq_hw_caps.link_speed_msk & AQ_NIC_RATE_100M) + ethtool_link_ksettings_add_link_mode(cmd, supported, + 100baseT_Full); + + if (self->aq_hw_caps.flow_control) + ethtool_link_ksettings_add_link_mode(cmd, supported, + Pause); + + ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(cmd, supported, TP); + + ethtool_link_ksettings_zero_link_mode(cmd, advertising); + + if (self->aq_nic_cfg.is_autoneg) + ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); + + if (self->aq_nic_cfg.link_speed_msk & AQ_NIC_RATE_10G) + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 10000baseT_Full); + + if (self->aq_nic_cfg.link_speed_msk & AQ_NIC_RATE_5G) + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 5000baseT_Full); + + if (self->aq_nic_cfg.link_speed_msk & AQ_NIC_RATE_2GS) + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 2500baseT_Full); + + if (self->aq_nic_cfg.link_speed_msk & AQ_NIC_RATE_1G) + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 1000baseT_Full); + + if (self->aq_nic_cfg.link_speed_msk & AQ_NIC_RATE_100M) + ethtool_link_ksettings_add_link_mode(cmd, advertising, + 100baseT_Full); + + if (self->aq_nic_cfg.flow_control) + ethtool_link_ksettings_add_link_mode(cmd, advertising, + Pause); + + ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); } int aq_nic_set_link_ksettings(struct aq_nic_s *self, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index da4bc09dac51..581de71a958a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -22,13 +22,11 @@ struct aq_pci_func_s { void *aq_vec[AQ_CFG_PCI_FUNC_MSIX_IRQS]; resource_size_t mmio_pa; unsigned int msix_entry_mask; - unsigned int irq_type; unsigned int ports; bool is_pci_enabled; bool is_regions; bool is_pci_using_dac; struct aq_hw_caps_s aq_hw_caps; - struct msix_entry msix_entry[AQ_CFG_PCI_FUNC_MSIX_IRQS]; }; struct aq_pci_func_s *aq_pci_func_alloc(struct aq_hw_ops *aq_hw_ops, @@ -87,7 +85,6 @@ int aq_pci_func_init(struct aq_pci_func_s *self) int err = 0; unsigned int bar = 0U; unsigned int port = 0U; - unsigned int i = 0U; err = pci_enable_device(self->pdev); if (err < 0) @@ -145,27 +142,16 @@ int aq_pci_func_init(struct aq_pci_func_s *self) } } - for (i = 0; i < self->aq_hw_caps.msix_irqs; i++) - self->msix_entry[i].entry = i; - /*enable interrupts */ -#if AQ_CFG_FORCE_LEGACY_INT - self->irq_type = AQ_HW_IRQ_LEGACY; -#else - err = pci_enable_msix(self->pdev, self->msix_entry, - self->aq_hw_caps.msix_irqs); +#if !AQ_CFG_FORCE_LEGACY_INT + err = pci_alloc_irq_vectors(self->pdev, self->aq_hw_caps.msix_irqs, + self->aq_hw_caps.msix_irqs, PCI_IRQ_MSIX); - if (err >= 0) { - self->irq_type = AQ_HW_IRQ_MSIX; - } else { - err = pci_enable_msi(self->pdev); - - if (err >= 0) { - self->irq_type = AQ_HW_IRQ_MSI; - } else { - self->irq_type = AQ_HW_IRQ_LEGACY; - err = 0; - } + if (err < 0) { + err = pci_alloc_irq_vectors(self->pdev, 1, 1, + PCI_IRQ_MSI | PCI_IRQ_LEGACY); + if (err < 0) + goto err_exit; } #endif @@ -196,34 +182,22 @@ err_exit: int aq_pci_func_alloc_irq(struct aq_pci_func_s *self, unsigned int i, char *name, void *aq_vec, cpumask_t *affinity_mask) { + struct pci_dev *pdev = self->pdev; int err = 0; - switch (self->irq_type) { - case AQ_HW_IRQ_MSIX: - err = request_irq(self->msix_entry[i].vector, aq_vec_isr, 0, + if (pdev->msix_enabled || pdev->msi_enabled) + err = request_irq(pci_irq_vector(pdev, i), aq_vec_isr, 0, name, aq_vec); - break; - - case AQ_HW_IRQ_MSI: - err = request_irq(self->pdev->irq, aq_vec_isr, 0, name, aq_vec); - break; - - case AQ_HW_IRQ_LEGACY: - err = request_irq(self->pdev->irq, aq_vec_isr_legacy, + else + err = request_irq(pci_irq_vector(pdev, i), aq_vec_isr_legacy, IRQF_SHARED, name, aq_vec); - break; - - default: - err = -EFAULT; - break; - } if (err >= 0) { self->msix_entry_mask |= (1 << i); self->aq_vec[i] = aq_vec; - if (self->irq_type == AQ_HW_IRQ_MSIX) - irq_set_affinity_hint(self->msix_entry[i].vector, + if (pdev->msix_enabled) + irq_set_affinity_hint(pci_irq_vector(pdev, i), affinity_mask); } @@ -232,30 +206,16 @@ int aq_pci_func_alloc_irq(struct aq_pci_func_s *self, unsigned int i, void aq_pci_func_free_irqs(struct aq_pci_func_s *self) { + struct pci_dev *pdev = self->pdev; unsigned int i = 0U; for (i = 32U; i--;) { if (!((1U << i) & self->msix_entry_mask)) continue; - switch (self->irq_type) { - case AQ_HW_IRQ_MSIX: - irq_set_affinity_hint(self->msix_entry[i].vector, NULL); - free_irq(self->msix_entry[i].vector, self->aq_vec[i]); - break; - - case AQ_HW_IRQ_MSI: - free_irq(self->pdev->irq, self->aq_vec[i]); - break; - - case AQ_HW_IRQ_LEGACY: - free_irq(self->pdev->irq, self->aq_vec[i]); - break; - - default: - break; - } - + free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]); + if (pdev->msix_enabled) + irq_set_affinity_hint(pci_irq_vector(pdev, i), NULL); self->msix_entry_mask &= ~(1U << i); } } @@ -267,7 +227,11 @@ void __iomem *aq_pci_func_get_mmio(struct aq_pci_func_s *self) unsigned int aq_pci_func_get_irq_type(struct aq_pci_func_s *self) { - return self->irq_type; + if (self->pdev->msix_enabled) + return AQ_HW_IRQ_MSIX; + if (self->pdev->msi_enabled) + return AQ_HW_IRQ_MSIX; + return AQ_HW_IRQ_LEGACY; } void aq_pci_func_deinit(struct aq_pci_func_s *self) @@ -276,22 +240,7 @@ void aq_pci_func_deinit(struct aq_pci_func_s *self) goto err_exit; aq_pci_func_free_irqs(self); - - switch (self->irq_type) { - case AQ_HW_IRQ_MSI: - pci_disable_msi(self->pdev); - break; - - case AQ_HW_IRQ_MSIX: - pci_disable_msix(self->pdev); - break; - - case AQ_HW_IRQ_LEGACY: - break; - - default: - break; - } + pci_free_irq_vectors(self->pdev); if (self->is_regions) pci_release_regions(self->pdev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 70590144be03..afb0967d2ce6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -2400,7 +2400,7 @@ static void cxgb_get_stats(struct net_device *dev, ns->rx_over_errors = 0; ns->rx_crc_errors = stats.rx_fcs_err; ns->rx_frame_errors = stats.rx_symbol_err; - ns->rx_fifo_errors = stats.rx_ovflow0 + stats.rx_ovflow1 + + ns->rx_dropped = stats.rx_ovflow0 + stats.rx_ovflow1 + stats.rx_ovflow2 + stats.rx_ovflow3 + stats.rx_trunc0 + stats.rx_trunc1 + stats.rx_trunc2 + stats.rx_trunc3; @@ -4907,8 +4907,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) "continuing\n"); adapter->params.offload = 0; } else { - adapter->tc_u32 = cxgb4_init_tc_u32(adapter, - CXGB4_MAX_LINK_HANDLE); + adapter->tc_u32 = cxgb4_init_tc_u32(adapter); if (!adapter->tc_u32) dev_warn(&pdev->dev, "could not offload tc u32, continuing\n"); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c index 52af62e0ecb6..a1b19422b339 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -437,28 +437,26 @@ void cxgb4_cleanup_tc_u32(struct adapter *adap) t4_free_mem(adap->tc_u32); } -struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap, - unsigned int size) +struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap) { + unsigned int max_tids = adap->tids.nftids; struct cxgb4_tc_u32_table *t; unsigned int i; - if (!size) + if (!max_tids) return NULL; t = t4_alloc_mem(sizeof(*t) + - (size * sizeof(struct cxgb4_link))); + (max_tids * sizeof(struct cxgb4_link))); if (!t) return NULL; - t->size = size; + t->size = max_tids; for (i = 0; i < t->size; i++) { struct cxgb4_link *link = &t->table[i]; unsigned int bmap_size; - unsigned int max_tids; - max_tids = adap->tids.nftids; bmap_size = BITS_TO_LONGS(max_tids); link->tid_map = t4_alloc_mem(sizeof(unsigned long) * bmap_size); if (!link->tid_map) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h index 6bdc885eff22..021261a41c13 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h @@ -37,8 +37,6 @@ #include <net/pkt_cls.h> -#define CXGB4_MAX_LINK_HANDLE 32 - static inline bool can_tc_u32_offload(struct net_device *dev) { struct adapter *adap = netdev2adap(dev); @@ -52,6 +50,5 @@ int cxgb4_delete_knode(struct net_device *dev, __be16 protocol, struct tc_cls_u32_offload *cls); void cxgb4_cleanup_tc_u32(struct adapter *adapter); -struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap, - unsigned int size); +struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap); #endif /* __CXGB4_TC_U32_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 36105b6837cb..d0868c2320da 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -591,7 +591,6 @@ void t4_uld_mem_free(struct adapter *adap) void t4_uld_clean_up(struct adapter *adap) { - struct sge_uld_rxq_info *rxq_info; unsigned int i; if (!adap->uld) @@ -599,7 +598,6 @@ void t4_uld_clean_up(struct adapter *adap) for (i = 0; i < CXGB4_ULD_MAX; i++) { if (!adap->uld[i].handle) continue; - rxq_info = adap->sge.uld_rxq_info[i]; if (adap->flags & FULL_INIT_DONE) quiesce_rx_uld(adap, i); if (adap->flags & USING_MSIX) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index ecf3ccc257bc..a323185507ec 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -169,6 +169,9 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x509b), /* Custom T540-CR LOM */ CH_PCI_ID_TABLE_FENTRY(0x509c), /* Custom T520-CR*/ CH_PCI_ID_TABLE_FENTRY(0x509d), /* Custom T540-CR*/ + CH_PCI_ID_TABLE_FENTRY(0x509e), /* Custom T520-CR */ + CH_PCI_ID_TABLE_FENTRY(0x509f), /* Custom T540-CR */ + CH_PCI_ID_TABLE_FENTRY(0x50a0), /* Custom T540-CR */ /* T6 adapters: */ @@ -185,6 +188,8 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x6011), CH_PCI_ID_TABLE_FENTRY(0x6014), CH_PCI_ID_TABLE_FENTRY(0x6015), + CH_PCI_ID_TABLE_FENTRY(0x6080), + CH_PCI_ID_TABLE_FENTRY(0x6081), CH_PCI_DEVICE_ID_TABLE_DEFINE_END; #endif /* __T4_PCI_ID_TBL_H__ */ diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c index f18aba05f1c2..23d82748f52b 100644 --- a/drivers/net/ethernet/ethoc.c +++ b/drivers/net/ethernet/ethoc.c @@ -1031,7 +1031,6 @@ static int ethoc_probe(struct platform_device *pdev) struct ethoc *priv = NULL; int num_bd; int ret = 0; - bool random_mac = false; struct ethoc_platform_data *pdata = dev_get_platdata(&pdev->dev); u32 eth_clkfreq = pdata ? pdata->eth_clkfreq : 0; @@ -1169,16 +1168,11 @@ static int ethoc_probe(struct platform_device *pdev) /* Check the MAC again for validity, if it still isn't choose and * program a random one. */ - if (!is_valid_ether_addr(netdev->dev_addr)) { - eth_random_addr(netdev->dev_addr); - random_mac = true; - } + if (!is_valid_ether_addr(netdev->dev_addr)) + eth_hw_addr_random(netdev); ethoc_do_set_mac_address(netdev); - if (random_mac) - netdev->addr_assign_type = NET_ADDR_RANDOM; - /* Allow the platform setup code to adjust MII management bus clock. */ if (!eth_clkfreq) { struct clk *clk = devm_clk_get(&pdev->dev, NULL); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 2cc552ddd8f0..91a16641e851 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2910,6 +2910,7 @@ static void set_multicast_list(struct net_device *ndev) struct netdev_hw_addr *ha; unsigned int i, bit, data, crc, tmp; unsigned char hash; + unsigned int hash_high = 0, hash_low = 0; if (ndev->flags & IFF_PROMISC) { tmp = readl(fep->hwp + FEC_R_CNTRL); @@ -2932,11 +2933,7 @@ static void set_multicast_list(struct net_device *ndev) return; } - /* Clear filter and add the addresses in hash register - */ - writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); - writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW); - + /* Add the addresses in hash register */ netdev_for_each_mc_addr(ha, ndev) { /* calculate crc32 value of mac address */ crc = 0xffffffff; @@ -2954,16 +2951,14 @@ static void set_multicast_list(struct net_device *ndev) */ hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f; - if (hash > 31) { - tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH); - tmp |= 1 << (hash - 32); - writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); - } else { - tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_LOW); - tmp |= 1 << hash; - writel(tmp, fep->hwp + FEC_GRP_HASH_TABLE_LOW); - } + if (hash > 31) + hash_high |= 1 << (hash - 32); + else + hash_low |= 1 << hash; } + + writel(hash_high, fep->hwp + FEC_GRP_HASH_TABLE_HIGH); + writel(hash_low, fep->hwp + FEC_GRP_HASH_TABLE_LOW); } /* Set a MAC change in hardware. */ diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 54e3ce9bd94c..753259091b22 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -964,11 +964,10 @@ static int fs_enet_probe(struct platform_device *ofdev) */ clk = devm_clk_get(&ofdev->dev, "per"); if (!IS_ERR(clk)) { - err = clk_prepare_enable(clk); - if (err) { - ret = err; + ret = clk_prepare_enable(clk); + if (ret) goto out_deregister_fixed_link; - } + fpi->clk_per = clk; } @@ -1045,10 +1044,10 @@ out_cleanup_data: out_free_dev: free_netdev(ndev); out_put: - of_node_put(fpi->phy_node); if (fpi->clk_per) clk_disable_unprepare(fpi->clk_per); out_deregister_fixed_link: + of_node_put(fpi->phy_node); if (of_phy_is_fixed_link(ofdev->dev.of_node)) of_phy_deregister_fixed_link(ofdev->dev.of_node); out_free_fpi: diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 6e50ec82b3d8..0cec06bec63e 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -701,11 +701,6 @@ static void hip04_tx_timeout_task(struct work_struct *work) hip04_mac_open(priv->ndev); } -static struct net_device_stats *hip04_get_stats(struct net_device *ndev) -{ - return &ndev->stats; -} - static int hip04_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { @@ -764,7 +759,6 @@ static const struct ethtool_ops hip04_ethtool_ops = { static const struct net_device_ops hip04_netdev_ops = { .ndo_open = hip04_mac_open, .ndo_stop = hip04_mac_stop, - .ndo_get_stats = hip04_get_stats, .ndo_start_xmit = hip04_mac_start_xmit, .ndo_set_mac_address = hip04_set_mac_address, .ndo_tx_timeout = hip04_timeout, diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index c46935d4a3fe..0c94e23985be 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -189,9 +189,10 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, } ltb->map_id = adapter->map_id; adapter->map_id++; + + init_completion(&adapter->fw_done); send_request_map(adapter, ltb->addr, ltb->size, ltb->map_id); - init_completion(&adapter->fw_done); wait_for_completion(&adapter->fw_done); return 0; } @@ -505,7 +506,7 @@ rx_pool_alloc_failed: adapter->rx_pool = NULL; rx_pool_arr_alloc_failed: for (i = 0; i < adapter->req_rx_queues; i++) - napi_enable(&adapter->napi[i]); + napi_disable(&adapter->napi[i]); alloc_napi_failed: return -ENOMEM; } @@ -1126,10 +1127,10 @@ static void ibmvnic_get_ethtool_stats(struct net_device *dev, crq.request_statistics.ioba = cpu_to_be32(adapter->stats_token); crq.request_statistics.len = cpu_to_be32(sizeof(struct ibmvnic_statistics)); - ibmvnic_send_crq(adapter, &crq); /* Wait for data to be written */ init_completion(&adapter->stats_done); + ibmvnic_send_crq(adapter, &crq); wait_for_completion(&adapter->stats_done); for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++) @@ -1501,7 +1502,7 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry) adapter->req_rx_queues = adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; - adapter->req_mtu = adapter->max_mtu; + adapter->req_mtu = adapter->netdev->mtu + ETH_HLEN; } total_queues = adapter->req_tx_queues + adapter->req_rx_queues; @@ -2190,12 +2191,12 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq, if (!found) { dev_err(dev, "Couldn't find error id %x\n", - crq->request_error_rsp.error_id); + be32_to_cpu(crq->request_error_rsp.error_id)); return; } dev_err(dev, "Detailed info for error id %x:", - crq->request_error_rsp.error_id); + be32_to_cpu(crq->request_error_rsp.error_id)); for (i = 0; i < error_buff->len; i++) { pr_cont("%02x", (int)error_buff->buff[i]); @@ -2274,8 +2275,8 @@ static void handle_error_indication(union ibmvnic_crq *crq, dev_err(dev, "Firmware reports %serror id %x, cause %d\n", crq->error_indication. flags & IBMVNIC_FATAL_ERROR ? "FATAL " : "", - crq->error_indication.error_id, - crq->error_indication.error_cause); + be32_to_cpu(crq->error_indication.error_id), + be16_to_cpu(crq->error_indication.error_cause)); error_buff = kmalloc(sizeof(*error_buff), GFP_ATOMIC); if (!error_buff) @@ -2393,10 +2394,10 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, case PARTIALSUCCESS: dev_info(dev, "req=%lld, rsp=%ld in %s queue, retrying.\n", *req_value, - (long int)be32_to_cpu(crq->request_capability_rsp. + (long int)be64_to_cpu(crq->request_capability_rsp. number), name); release_sub_crqs_no_irqs(adapter); - *req_value = be32_to_cpu(crq->request_capability_rsp.number); + *req_value = be64_to_cpu(crq->request_capability_rsp.number); init_sub_crqs(adapter, 1); return; default: @@ -2631,12 +2632,12 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq, break; case MIN_MTU: adapter->min_mtu = be64_to_cpu(crq->query_capability.number); - netdev->min_mtu = adapter->min_mtu; + netdev->min_mtu = adapter->min_mtu - ETH_HLEN; netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu); break; case MAX_MTU: adapter->max_mtu = be64_to_cpu(crq->query_capability.number); - netdev->max_mtu = adapter->max_mtu; + netdev->max_mtu = adapter->max_mtu - ETH_HLEN; netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu); break; case MAX_MULTICAST_FILTERS: @@ -2804,9 +2805,9 @@ static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len, crq.collect_fw_trace.correlator = adapter->ras_comps[num].correlator; crq.collect_fw_trace.ioba = cpu_to_be32(trace_tok); crq.collect_fw_trace.len = adapter->ras_comps[num].trace_buff_size; - ibmvnic_send_crq(adapter, &crq); init_completion(&adapter->fw_done); + ibmvnic_send_crq(adapter, &crq); wait_for_completion(&adapter->fw_done); if (*ppos + len > be32_to_cpu(adapter->ras_comps[num].trace_buff_size)) @@ -3586,9 +3587,9 @@ static int ibmvnic_dump_show(struct seq_file *seq, void *v) memset(&crq, 0, sizeof(crq)); crq.request_dump_size.first = IBMVNIC_CRQ_CMD; crq.request_dump_size.cmd = REQUEST_DUMP_SIZE; - ibmvnic_send_crq(adapter, &crq); init_completion(&adapter->fw_done); + ibmvnic_send_crq(adapter, &crq); wait_for_completion(&adapter->fw_done); seq_write(seq, adapter->dump_data, adapter->dump_data_size); @@ -3634,8 +3635,8 @@ static void handle_crq_init_rsp(struct work_struct *work) } } - send_version_xchg(adapter); reinit_completion(&adapter->init_done); + send_version_xchg(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { dev_err(dev, "Passive init timeout\n"); goto task_failed; @@ -3645,9 +3646,9 @@ static void handle_crq_init_rsp(struct work_struct *work) if (adapter->renegotiate) { adapter->renegotiate = false; release_sub_crqs_no_irqs(adapter); - send_cap_queries(adapter); reinit_completion(&adapter->init_done); + send_cap_queries(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { dev_err(dev, "Passive init timeout\n"); @@ -3661,9 +3662,7 @@ static void handle_crq_init_rsp(struct work_struct *work) goto task_failed; netdev->real_num_tx_queues = adapter->req_tx_queues; - netdev->mtu = adapter->req_mtu; - netdev->min_mtu = adapter->min_mtu; - netdev->max_mtu = adapter->max_mtu; + netdev->mtu = adapter->req_mtu - ETH_HLEN; if (adapter->failover) { adapter->failover = false; @@ -3777,9 +3776,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->debugfs_dump = ent; } } - ibmvnic_send_crq_init(adapter); init_completion(&adapter->init_done); + ibmvnic_send_crq_init(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) return 0; @@ -3787,9 +3786,9 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) if (adapter->renegotiate) { adapter->renegotiate = false; release_sub_crqs_no_irqs(adapter); - send_cap_queries(adapter); reinit_completion(&adapter->init_done); + send_cap_queries(adapter); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) return 0; @@ -3803,7 +3802,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) } netdev->real_num_tx_queues = adapter->req_tx_queues; - netdev->mtu = adapter->req_mtu; + netdev->mtu = adapter->req_mtu - ETH_HLEN; rc = register_netdev(netdev); if (rc) { diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index fdd9069b6cec..7a23d3e47c6f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -467,6 +467,22 @@ struct i40e_mac_filter { enum i40e_filter_state state; }; +/* Wrapper structure to keep track of filters while we are preparing to send + * firmware commands. We cannot send firmware commands while holding a + * spinlock, since it might sleep. To avoid this, we wrap the added filters in + * a separate structure, which will track the state change and update the real + * filter while under lock. We can't simply hold the filters in a separate + * list, as this opens a window for a race condition when adding new MAC + * addresses to all VLANs, or when adding new VLANs to all MAC addresses. + */ +struct i40e_new_mac_filter { + struct hlist_node hlist; + struct i40e_mac_filter *f; + + /* Track future changes to state separately */ + enum i40e_filter_state state; +}; + struct i40e_veb { struct i40e_pf *pf; u16 idx; diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 7ca048f0b159..d570219efd9f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -174,8 +174,6 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) if (!vsi) return; - memset(¶ms, 0, sizeof(params)); - i40e_client_get_params(vsi, ¶ms); mutex_lock(&i40e_client_instance_mutex); list_for_each_entry(cdev, &i40e_client_instances, list) { if (cdev->lan_info.pf == vsi->back) { @@ -186,6 +184,8 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) "Cannot locate client instance l2_param_change routine\n"); continue; } + memset(¶ms, 0, sizeof(params)); + i40e_client_get_params(vsi, ¶ms); if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort l2 param change\n"); @@ -510,9 +510,10 @@ void i40e_client_subtask(struct i40e_pf *pf) continue; if (!existing) { - dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n", + dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x dev=0x%02x func=0x%02x\n", client->name, pf->hw.pf_id, - pf->hw.bus.device, pf->hw.bus.func); + pf->hw.bus.bus_id, pf->hw.bus.device, + pf->hw.bus.func); } mutex_lock(&i40e_client_instance_mutex); @@ -561,8 +562,9 @@ int i40e_lan_add_device(struct i40e_pf *pf) ldev->pf = pf; INIT_LIST_HEAD(&ldev->list); list_add(&ldev->list, &i40e_devices); - dev_info(&pf->pdev->dev, "Added LAN device PF%d bus=0x%02x func=0x%02x\n", - pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func); + dev_info(&pf->pdev->dev, "Added LAN device PF%d bus=0x%02x dev=0x%02x func=0x%02x\n", + pf->hw.pf_id, pf->hw.bus.bus_id, + pf->hw.bus.device, pf->hw.bus.func); /* Since in some cases register may have happened before a device gets * added, we can schedule a subtask to go initiate the clients if @@ -590,9 +592,9 @@ int i40e_lan_del_device(struct i40e_pf *pf) mutex_lock(&i40e_device_mutex); list_for_each_entry_safe(ldev, tmp, &i40e_devices, list) { if (ldev->pf == pf) { - dev_info(&pf->pdev->dev, "Deleted LAN device PF%d bus=0x%02x func=0x%02x\n", - pf->hw.pf_id, pf->hw.bus.device, - pf->hw.bus.func); + dev_info(&pf->pdev->dev, "Deleted LAN device PF%d bus=0x%02x dev=0x%02x func=0x%02x\n", + pf->hw.pf_id, pf->hw.bus.bus_id, + pf->hw.bus.device, pf->hw.bus.func); list_del(&ldev->list); kfree(ldev); ret = 0; @@ -653,13 +655,11 @@ static int i40e_client_release(struct i40e_client *client) * i40e_client_prepare - prepare client specific resources * @client: pointer to the registered client * - * Return 0 on success or < 0 on error **/ -static int i40e_client_prepare(struct i40e_client *client) +static void i40e_client_prepare(struct i40e_client *client) { struct i40e_device *ldev; struct i40e_pf *pf; - int ret = 0; mutex_lock(&i40e_device_mutex); list_for_each_entry(ldev, &i40e_devices, list) { @@ -669,7 +669,6 @@ static int i40e_client_prepare(struct i40e_client *client) i40e_service_event_schedule(pf); } mutex_unlock(&i40e_device_mutex); - return ret; } /** @@ -926,13 +925,9 @@ int i40e_register_client(struct i40e_client *client) set_bit(__I40E_CLIENT_REGISTERED, &client->state); mutex_unlock(&i40e_client_mutex); - if (i40e_client_prepare(client)) { - ret = -EIO; - goto out; - } + i40e_client_prepare(client); - pr_info("i40e: Registered client %s with return code %d\n", - client->name, ret); + pr_info("i40e: Registered client %s\n", client->name); out: return ret; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 128735975caa..fc73e4ef27ac 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1838,6 +1838,8 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, hw_link_info->link_speed = (enum i40e_aq_link_speed)resp->link_speed; hw_link_info->link_info = resp->link_info; hw_link_info->an_info = resp->an_info; + hw_link_info->fec_info = resp->config & (I40E_AQ_CONFIG_FEC_KR_ENA | + I40E_AQ_CONFIG_FEC_RS_ENA); hw_link_info->ext_info = resp->ext_info; hw_link_info->loopback = resp->loopback; hw_link_info->max_frame_size = le16_to_cpu(resp->max_frame_size); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index c4ab3c1ae02a..a22e26200bcc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -803,9 +803,12 @@ static int i40e_set_settings(struct net_device *netdev, if (change || (abilities.link_speed != config.link_speed)) { /* copy over the rest of the abilities */ config.phy_type = abilities.phy_type; + config.phy_type_ext = abilities.phy_type_ext; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; + config.fec_config = abilities.fec_cfg_curr_mod_ext_info & + I40E_AQ_PHY_FEC_CONFIG_MASK; /* save the requested speeds */ hw->phy.link_info.requested_speeds = config.link_speed; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9f785c015a2f..e83a8ca5dd65 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -41,7 +41,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 25 +#define DRV_VERSION_BUILD 27 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -1255,6 +1255,7 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, int vlan_filters) { struct i40e_mac_filter *f, *add_head; + struct i40e_new_mac_filter *new; struct hlist_node *h; int bkt, new_vlan; @@ -1273,13 +1274,13 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, */ /* Update the filters about to be added in place */ - hlist_for_each_entry(f, tmp_add_list, hlist) { - if (vsi->info.pvid && f->vlan != vsi->info.pvid) - f->vlan = vsi->info.pvid; - else if (vlan_filters && f->vlan == I40E_VLAN_ANY) - f->vlan = 0; - else if (!vlan_filters && f->vlan == 0) - f->vlan = I40E_VLAN_ANY; + hlist_for_each_entry(new, tmp_add_list, hlist) { + if (vsi->info.pvid && new->f->vlan != vsi->info.pvid) + new->f->vlan = vsi->info.pvid; + else if (vlan_filters && new->f->vlan == I40E_VLAN_ANY) + new->f->vlan = 0; + else if (!vlan_filters && new->f->vlan == 0) + new->f->vlan = I40E_VLAN_ANY; } /* Update the remaining active filters */ @@ -1305,9 +1306,16 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, if (!add_head) return -ENOMEM; - /* Put the replacement filter into the add list */ - hash_del(&add_head->hlist); - hlist_add_head(&add_head->hlist, tmp_add_list); + /* Create a temporary i40e_new_mac_filter */ + new = kzalloc(sizeof(*new), GFP_ATOMIC); + if (!new) + return -ENOMEM; + + new->f = add_head; + new->state = add_head->state; + + /* Add the new filter to the tmp list */ + hlist_add_head(&new->hlist, tmp_add_list); /* Put the original filter into the delete list */ f->state = I40E_FILTER_REMOVE; @@ -1819,16 +1827,15 @@ static void i40e_set_rx_mode(struct net_device *netdev) } /** - * i40e_undo_filter_entries - Undo the changes made to MAC filter entries + * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries * @vsi: Pointer to VSI struct * @from: Pointer to list which contains MAC filter entries - changes to * those entries needs to be undone. * - * MAC filter entries from list were slated to be sent to firmware, either for - * addition or deletion. + * MAC filter entries from this list were slated for deletion. **/ -static void i40e_undo_filter_entries(struct i40e_vsi *vsi, - struct hlist_head *from) +static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi, + struct hlist_head *from) { struct i40e_mac_filter *f; struct hlist_node *h; @@ -1843,6 +1850,53 @@ static void i40e_undo_filter_entries(struct i40e_vsi *vsi, } /** + * i40e_undo_add_filter_entries - Undo the changes made to MAC filter entries + * @vsi: Pointer to vsi struct + * @from: Pointer to list which contains MAC filter entries - changes to + * those entries needs to be undone. + * + * MAC filter entries from this list were slated for addition. + **/ +static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi, + struct hlist_head *from) +{ + struct i40e_new_mac_filter *new; + struct hlist_node *h; + + hlist_for_each_entry_safe(new, h, from, hlist) { + /* We can simply free the wrapper structure */ + hlist_del(&new->hlist); + kfree(new); + } +} + +/** + * i40e_next_entry - Get the next non-broadcast filter from a list + * @next: pointer to filter in list + * + * Returns the next non-broadcast filter in the list. Required so that we + * ignore broadcast filters within the list, since these are not handled via + * the normal firmware update path. + */ +static +struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next) +{ + while (next) { + next = hlist_entry(next->hlist.next, + typeof(struct i40e_new_mac_filter), + hlist); + + /* keep going if we found a broadcast filter */ + if (next && is_broadcast_ether_addr(next->f->macaddr)) + continue; + + break; + } + + return next; +} + +/** * i40e_update_filter_state - Update filter state based on return data * from firmware * @count: Number of filters added @@ -1855,7 +1909,7 @@ static void i40e_undo_filter_entries(struct i40e_vsi *vsi, static int i40e_update_filter_state(int count, struct i40e_aqc_add_macvlan_element_data *add_list, - struct i40e_mac_filter *add_head) + struct i40e_new_mac_filter *add_head) { int retval = 0; int i; @@ -1874,9 +1928,9 @@ i40e_update_filter_state(int count, retval++; } - add_head = hlist_entry(add_head->hlist.next, - typeof(struct i40e_mac_filter), - hlist); + add_head = i40e_next_filter(add_head); + if (!add_head) + break; } return retval; @@ -1933,7 +1987,7 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, static void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_aqc_add_macvlan_element_data *list, - struct i40e_mac_filter *add_head, + struct i40e_new_mac_filter *add_head, int num_add, bool *promisc_changed) { struct i40e_hw *hw = &vsi->back->hw; @@ -1961,10 +2015,12 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, * This function sets or clears the promiscuous broadcast flags for VLAN * filters in order to properly receive broadcast frames. Assumes that only * broadcast filters are passed. + * + * Returns status indicating success or failure; **/ -static -void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, - struct i40e_mac_filter *f) +static i40e_status +i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, + struct i40e_mac_filter *f) { bool enable = f->state == I40E_FILTER_NEW; struct i40e_hw *hw = &vsi->back->hw; @@ -1983,15 +2039,13 @@ void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, NULL); } - if (aq_ret) { + if (aq_ret) dev_warn(&vsi->back->pdev->dev, "Error %s setting broadcast promiscuous mode on %s\n", i40e_aq_str(hw, hw->aq.asq_last_status), vsi_name); - f->state = I40E_FILTER_FAILED; - } else if (enable) { - f->state = I40E_FILTER_ACTIVE; - } + + return aq_ret; } /** @@ -2005,7 +2059,8 @@ void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, int i40e_sync_vsi_filters(struct i40e_vsi *vsi) { struct hlist_head tmp_add_list, tmp_del_list; - struct i40e_mac_filter *f, *add_head = NULL; + struct i40e_mac_filter *f; + struct i40e_new_mac_filter *new, *add_head = NULL; struct i40e_hw *hw = &vsi->back->hw; unsigned int failed_filters = 0; unsigned int vlan_filters = 0; @@ -2059,8 +2114,17 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) continue; } if (f->state == I40E_FILTER_NEW) { - hash_del(&f->hlist); - hlist_add_head(&f->hlist, &tmp_add_list); + /* Create a temporary i40e_new_mac_filter */ + new = kzalloc(sizeof(*new), GFP_ATOMIC); + if (!new) + goto err_no_memory_locked; + + /* Store pointer to the real filter */ + new->f = f; + new->state = f->state; + + /* Add it to the hash list */ + hlist_add_head(&new->hlist, &tmp_add_list); } /* Count the number of active (current and new) VLAN @@ -2095,7 +2159,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) cmd_flags = 0; /* handle broadcast filters by updating the broadcast - * promiscuous flag instead of deleting a MAC filter. + * promiscuous flag and release filter list. */ if (is_broadcast_ether_addr(f->macaddr)) { i40e_aqc_broadcast_filter(vsi, vsi_name, f); @@ -2153,36 +2217,37 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) goto err_no_memory; num_add = 0; - hlist_for_each_entry_safe(f, h, &tmp_add_list, hlist) { + hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state)) { - f->state = I40E_FILTER_FAILED; + new->state = I40E_FILTER_FAILED; continue; } /* handle broadcast filters by updating the broadcast * promiscuous flag instead of adding a MAC filter. */ - if (is_broadcast_ether_addr(f->macaddr)) { - u64 key = i40e_addr_to_hkey(f->macaddr); - i40e_aqc_broadcast_filter(vsi, vsi_name, f); - - hlist_del(&f->hlist); - hash_add(vsi->mac_filter_hash, &f->hlist, key); + if (is_broadcast_ether_addr(new->f->macaddr)) { + if (i40e_aqc_broadcast_filter(vsi, vsi_name, + new->f)) + new->state = I40E_FILTER_FAILED; + else + new->state = I40E_FILTER_ACTIVE; continue; } /* add to add array */ if (num_add == 0) - add_head = f; + add_head = new; cmd_flags = 0; - ether_addr_copy(add_list[num_add].mac_addr, f->macaddr); - if (f->vlan == I40E_VLAN_ANY) { + ether_addr_copy(add_list[num_add].mac_addr, + new->f->macaddr); + if (new->f->vlan == I40E_VLAN_ANY) { add_list[num_add].vlan_tag = 0; cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; } else { add_list[num_add].vlan_tag = - cpu_to_le16((u16)(f->vlan)); + cpu_to_le16((u16)(new->f->vlan)); } add_list[num_add].queue_number = 0; /* set invalid match method for later detection */ @@ -2208,11 +2273,12 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) * the VSI's list. */ spin_lock_bh(&vsi->mac_filter_hash_lock); - hlist_for_each_entry_safe(f, h, &tmp_add_list, hlist) { - u64 key = i40e_addr_to_hkey(f->macaddr); - - hlist_del(&f->hlist); - hash_add(vsi->mac_filter_hash, &f->hlist, key); + hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { + /* Only update the state if we're still NEW */ + if (new->f->state == I40E_FILTER_NEW) + new->f->state = new->state; + hlist_del(&new->hlist); + kfree(new); } spin_unlock_bh(&vsi->mac_filter_hash_lock); kfree(add_list); @@ -2373,8 +2439,8 @@ err_no_memory: /* Restore elements on the temporary add and delete lists */ spin_lock_bh(&vsi->mac_filter_hash_lock); err_no_memory_locked: - i40e_undo_filter_entries(vsi, &tmp_del_list); - i40e_undo_filter_entries(vsi, &tmp_add_list); + i40e_undo_del_filter_entries(vsi, &tmp_del_list); + i40e_undo_add_filter_entries(vsi, &tmp_add_list); spin_unlock_bh(&vsi->mac_filter_hash_lock); vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; @@ -5272,6 +5338,8 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) enum i40e_aq_link_speed new_speed; char *speed = "Unknown"; char *fc = "Unknown"; + char *fec = ""; + char *an = ""; new_speed = vsi->back->hw.phy.link_info.link_speed; @@ -5331,8 +5399,23 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) break; } - netdev_info(vsi->netdev, "NIC Link is Up %sbps Full Duplex, Flow Control: %s\n", - speed, fc); + if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { + fec = ", FEC: None"; + an = ", Autoneg: False"; + + if (vsi->back->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) + an = ", Autoneg: True"; + + if (vsi->back->hw.phy.link_info.fec_info & + I40E_AQ_CONFIG_FEC_KR_ENA) + fec = ", FEC: CL74 FC-FEC/BASE-R"; + else if (vsi->back->hw.phy.link_info.fec_info & + I40E_AQ_CONFIG_FEC_RS_ENA) + fec = ", FEC: CL108 RS-FEC"; + } + + netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex%s%s, Flow Control: %s\n", + speed, fec, an, fc); } /** @@ -10990,6 +11073,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->subsystem_device_id = pdev->subsystem_device; hw->bus.device = PCI_SLOT(pdev->devfn); hw->bus.func = PCI_FUNC(pdev->devfn); + hw->bus.bus_id = pdev->bus->number; pf->instance = pfs_found; /* set up the locks for the AQ, do this only once in probe diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h index be74bcf9c961..fea81ed065db 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_osdep.h +++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h @@ -69,12 +69,12 @@ struct i40e_virt_mem { #define i40e_allocate_virt_mem(h, m, s) i40e_allocate_virt_mem_d(h, m, s) #define i40e_free_virt_mem(h, m) i40e_free_virt_mem_d(h, m) -#define i40e_debug(h, m, s, ...) \ -do { \ - if (((m) & (h)->debug_mask)) \ - pr_info("i40e %02x.%x " s, \ - (h)->bus.device, (h)->bus.func, \ - ##__VA_ARGS__); \ +#define i40e_debug(h, m, s, ...) \ +do { \ + if (((m) & (h)->debug_mask)) \ + pr_info("i40e %02x:%02x.%x " s, \ + (h)->bus.bus_id, (h)->bus.device, \ + (h)->bus.func, ##__VA_ARGS__); \ } while (0) typedef enum i40e_status_code i40e_status; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 9e49ffafce28..2caee35528fa 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -280,7 +280,7 @@ void i40e_ptp_rx_hang(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - int i; + unsigned int i, cleared = 0; /* Since we cannot turn off the Rx timestamp logic if the device is * configured for Tx timestamping, we check if Rx timestamping is @@ -306,14 +306,25 @@ void i40e_ptp_rx_hang(struct i40e_vsi *vsi) time_is_before_jiffies(pf->latch_events[i] + HZ)) { rd32(hw, I40E_PRTTSYN_RXTIME_H(i)); pf->latch_event_flags &= ~BIT(i); - pf->rx_hwtstamp_cleared++; - dev_warn(&pf->pdev->dev, - "Clearing a missed Rx timestamp event for RXTIME[%d]\n", - i); + cleared++; } } spin_unlock_bh(&pf->ptp_rx_lock); + + /* Log a warning if more than 2 timestamps got dropped in the same + * check. We don't want to warn about all drops because it can occur + * in normal scenarios such as PTP frames on multicast addresses we + * aren't listening to. However, administrator should know if this is + * the reason packets aren't receiving timestamps. + */ + if (cleared > 2) + dev_dbg(&pf->pdev->dev, + "Dropped %d missed RXTIME timestamp events\n", + cleared); + + /* Finally, update the rx_hwtstamp_cleared counter */ + pf->rx_hwtstamp_cleared += cleared; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f5baeb154d39..09f09ea7a5e5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -432,7 +432,12 @@ unsupported_flow: ret = -EINVAL; } - /* The buffer allocated here is freed by the i40e_clean_tx_ring() */ + /* The buffer allocated here will be normally be freed by + * i40e_clean_fdir_tx_irq() as it reclaims resources after transmit + * completion. In the event of an error adding the buffer to the FDIR + * ring, it will immediately be freed. It may also be freed by + * i40e_clean_tx_ring() when closing the VSI. + */ return ret; } @@ -1013,14 +1018,15 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_ring->rx_bi) return; + if (rx_ring->skb) { + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + } + /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; - if (rx_bi->skb) { - dev_kfree_skb(rx_bi->skb); - rx_bi->skb = NULL; - } if (!rx_bi->page) continue; @@ -1425,45 +1431,6 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, } /** - * i40e_pull_tail - i40e specific version of skb_pull_tail - * @rx_ring: rx descriptor ring packet is being transacted on - * @skb: pointer to current skb being adjusted - * - * This function is an i40e specific version of __pskb_pull_tail. The - * main difference between this version and the original function is that - * this function can make several assumptions about the state of things - * that allow for significant optimizations versus the standard function. - * As a result we can do things like drop a frag and maintain an accurate - * truesize for the skb. - */ -static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) -{ - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned char *va; - unsigned int pull_len; - - /* it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; -} - -/** * i40e_cleanup_headers - Correct empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @skb: pointer to current skb being fixed @@ -1478,10 +1445,6 @@ static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) **/ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) { - /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) - i40e_pull_tail(rx_ring, skb); - /* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) return true; @@ -1513,19 +1476,85 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, } /** - * i40e_page_is_reserved - check if reuse is possible + * i40e_page_is_reusable - check if any reuse is possible * @page: page struct to check + * + * A page is not reusable if it was allocated under low memory + * conditions, or it's not in the same NUMA node as this CPU. */ -static inline bool i40e_page_is_reserved(struct page *page) +static inline bool i40e_page_is_reusable(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); + return (page_to_nid(page) == numa_mem_id()) && + !page_is_pfmemalloc(page); +} + +/** + * i40e_can_reuse_rx_page - Determine if this page can be reused by + * the adapter for another receive + * + * @rx_buffer: buffer containing the page + * @page: page address from rx_buffer + * @truesize: actual size of the buffer in this page + * + * If page is reusable, rx_buffer->page_offset is adjusted to point to + * an unused region in the page. + * + * For small pages, @truesize will be a constant value, half the size + * of the memory at page. We'll attempt to alternate between high and + * low halves of the page, with one half ready for use by the hardware + * and the other half being consumed by the stack. We use the page + * ref count to determine whether the stack has finished consuming the + * portion of this page that was passed up with a previous packet. If + * the page ref count is >1, we'll assume the "other" half page is + * still busy, and this page cannot be reused. + * + * For larger pages, @truesize will be the actual space used by the + * received packet (adjusted upward to an even multiple of the cache + * line size). This will advance through the page by the amount + * actually consumed by the received packets while there is still + * space for a buffer. Each region of larger pages will be used at + * most once, after which the page will not be reused. + * + * In either case, if the page is reusable its refcount is increased. + **/ +static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, + struct page *page, + const unsigned int truesize) +{ +#if (PAGE_SIZE >= 8192) + unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; +#endif + + /* Is any reuse possible? */ + if (unlikely(!i40e_page_is_reusable(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > last_offset) + return false; +#endif + + /* Inc ref count on page before passing it up to the stack */ + get_page(page); + + return true; } /** * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware + * @size: packet length from rx_desc * @skb: sk_buff to place the data into * * This function will add the data contained in rx_buffer->page to the skb. @@ -1538,30 +1567,29 @@ static inline bool i40e_page_is_reserved(struct page *page) **/ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - union i40e_rx_desc *rx_desc, + unsigned int size, struct sk_buff *skb) { struct page *page = rx_buffer->page; - u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - unsigned int size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> - I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + unsigned char *va = page_address(page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) unsigned int truesize = I40E_RXBUFFER_2048; #else unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); - unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; #endif + unsigned int pull_len; + + if (unlikely(skb_is_nonlinear(skb))) + goto add_tail_frag; /* will the data fit in the skb we allocated? if so, just * copy it as it is pretty small anyway */ - if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buffer->page_offset; - + if (size <= I40E_RX_HDR_SIZE) { memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* page is not reserved, we can reuse buffer as-is */ - if (likely(!i40e_page_is_reserved(page))) + /* page is reusable, we can reuse buffer as-is */ + if (likely(i40e_page_is_reusable(page))) return true; /* this page cannot be reused so discard it */ @@ -1569,34 +1597,26 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, return false; } - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rx_buffer->page_offset, size, truesize); - - /* avoid re-using remote pages */ - if (unlikely(i40e_page_is_reserved(page))) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; + /* we need the header to contain the greater of either + * ETH_HLEN or 60 bytes if the skb->len is less than + * 60 for skb_pad. + */ + pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= truesize; -#else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; + /* align pull length to size of long to optimize + * memcpy performance + */ + memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - if (rx_buffer->page_offset > last_offset) - return false; -#endif + /* update all of the pointers */ + va += pull_len; + size -= pull_len; - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - get_page(rx_buffer->page); +add_tail_frag: + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + (unsigned long)va & ~PAGE_MASK, size, truesize); - return true; + return i40e_can_reuse_rx_page(rx_buffer, page, truesize); } /** @@ -1611,18 +1631,21 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, */ static inline struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc) + union i40e_rx_desc *rx_desc, + struct sk_buff *skb) { + u64 local_status_error_len = + le64_to_cpu(rx_desc->wb.qword1.status_error_len); + unsigned int size = + (local_status_error_len & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; struct i40e_rx_buffer *rx_buffer; - struct sk_buff *skb; struct page *page; rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; page = rx_buffer->page; prefetchw(page); - skb = rx_buffer->skb; - if (likely(!skb)) { void *page_addr = page_address(page) + rx_buffer->page_offset; @@ -1646,19 +1669,17 @@ struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring, * it now to avoid a possible cache miss */ prefetchw(skb->data); - } else { - rx_buffer->skb = NULL; } /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - I40E_RXBUFFER_2048, + size, DMA_FROM_DEVICE); /* pull page into skb */ - if (i40e_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + if (i40e_add_rx_frag(rx_ring, rx_buffer, size, skb)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); rx_ring->rx_stats.page_reuse_count++; @@ -1700,7 +1721,6 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, #define staterrlen rx_desc->wb.qword1.status_error_len if (unlikely(i40e_rx_is_programming_status(le64_to_cpu(staterrlen)))) { i40e_clean_programming_status(rx_ring, rx_desc); - rx_ring->rx_bi[ntc].skb = skb; return true; } /* if we are the last buffer then there is nothing else to do */ @@ -1708,8 +1728,6 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) return false; - /* place skb in next buffer to be received */ - rx_ring->rx_bi[ntc].skb = skb; rx_ring->rx_stats.non_eop_descs++; return true; @@ -1730,12 +1748,12 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; + struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); bool failure = false; while (likely(total_rx_packets < budget)) { union i40e_rx_desc *rx_desc; - struct sk_buff *skb; u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -1764,7 +1782,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) */ dma_rmb(); - skb = i40e_fetch_rx_buffer(rx_ring, rx_desc); + skb = i40e_fetch_rx_buffer(rx_ring, rx_desc, skb); if (!skb) break; @@ -1783,8 +1801,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) continue; } - if (i40e_cleanup_headers(rx_ring, skb)) + if (i40e_cleanup_headers(rx_ring, skb)) { + skb = NULL; continue; + } /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -1809,11 +1829,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; i40e_receive_skb(rx_ring, skb, vlan_tag); + skb = NULL; /* update budget accounting */ total_rx_packets++; } + rx_ring->skb = skb; + u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 1ea820e9debe..f80979025c01 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -253,7 +253,6 @@ struct i40e_tx_buffer { }; struct i40e_rx_buffer { - struct sk_buff *skb; dma_addr_t dma; struct page *page; unsigned int page_offset; @@ -354,6 +353,14 @@ struct i40e_ring { struct rcu_head rcu; /* to avoid race on free */ u16 next_to_alloc; + struct sk_buff *skb; /* When i40e_clean_rx_ring_irq() must + * return before it sees the EOP for + * the current packet, we save that skb + * here and resume receiving this + * packet the next time + * i40e_clean_rx_ring_irq() is called + * for this ring. + */ } ____cacheline_internodealigned_in_smp; enum i40e_latency_range { diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index b6cf8d2670a4..939f9fdc8f85 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -184,6 +184,7 @@ struct i40e_link_status { enum i40e_aq_link_speed link_speed; u8 link_info; u8 an_info; + u8 fec_info; u8 ext_info; u8 loopback; /* is Link Status Event notification to SW enabled */ @@ -469,6 +470,7 @@ struct i40e_bus_info { u16 func; u16 device; u16 lan_id; + u16 bus_id; }; /* Flow control (FC) parameters */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index d4e488267988..b758846d4dc5 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -501,14 +501,15 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_ring->rx_bi) return; + if (rx_ring->skb) { + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + } + /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; - if (rx_bi->skb) { - dev_kfree_skb(rx_bi->skb); - rx_bi->skb = NULL; - } if (!rx_bi->page) continue; @@ -903,45 +904,6 @@ void i40evf_process_skb_fields(struct i40e_ring *rx_ring, } /** - * i40e_pull_tail - i40e specific version of skb_pull_tail - * @rx_ring: rx descriptor ring packet is being transacted on - * @skb: pointer to current skb being adjusted - * - * This function is an i40e specific version of __pskb_pull_tail. The - * main difference between this version and the original function is that - * this function can make several assumptions about the state of things - * that allow for significant optimizations versus the standard function. - * As a result we can do things like drop a frag and maintain an accurate - * truesize for the skb. - */ -static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) -{ - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned char *va; - unsigned int pull_len; - - /* it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; -} - -/** * i40e_cleanup_headers - Correct empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @skb: pointer to current skb being fixed @@ -956,10 +918,6 @@ static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb) **/ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) { - /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) - i40e_pull_tail(rx_ring, skb); - /* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) return true; @@ -991,19 +949,85 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, } /** - * i40e_page_is_reserved - check if reuse is possible + * i40e_page_is_reusable - check if any reuse is possible * @page: page struct to check + * + * A page is not reusable if it was allocated under low memory + * conditions, or it's not in the same NUMA node as this CPU. */ -static inline bool i40e_page_is_reserved(struct page *page) +static inline bool i40e_page_is_reusable(struct page *page) { - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); + return (page_to_nid(page) == numa_mem_id()) && + !page_is_pfmemalloc(page); +} + +/** + * i40e_can_reuse_rx_page - Determine if this page can be reused by + * the adapter for another receive + * + * @rx_buffer: buffer containing the page + * @page: page address from rx_buffer + * @truesize: actual size of the buffer in this page + * + * If page is reusable, rx_buffer->page_offset is adjusted to point to + * an unused region in the page. + * + * For small pages, @truesize will be a constant value, half the size + * of the memory at page. We'll attempt to alternate between high and + * low halves of the page, with one half ready for use by the hardware + * and the other half being consumed by the stack. We use the page + * ref count to determine whether the stack has finished consuming the + * portion of this page that was passed up with a previous packet. If + * the page ref count is >1, we'll assume the "other" half page is + * still busy, and this page cannot be reused. + * + * For larger pages, @truesize will be the actual space used by the + * received packet (adjusted upward to an even multiple of the cache + * line size). This will advance through the page by the amount + * actually consumed by the received packets while there is still + * space for a buffer. Each region of larger pages will be used at + * most once, after which the page will not be reused. + * + * In either case, if the page is reusable its refcount is increased. + **/ +static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, + struct page *page, + const unsigned int truesize) +{ +#if (PAGE_SIZE >= 8192) + unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; +#endif + + /* Is any reuse possible? */ + if (unlikely(!i40e_page_is_reusable(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > last_offset) + return false; +#endif + + /* Inc ref count on page before passing it up to the stack */ + get_page(page); + + return true; } /** * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware + * @size: packet length from rx_desc * @skb: sk_buff to place the data into * * This function will add the data contained in rx_buffer->page to the skb. @@ -1016,30 +1040,29 @@ static inline bool i40e_page_is_reserved(struct page *page) **/ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - union i40e_rx_desc *rx_desc, + unsigned int size, struct sk_buff *skb) { struct page *page = rx_buffer->page; - u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - unsigned int size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> - I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + unsigned char *va = page_address(page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) unsigned int truesize = I40E_RXBUFFER_2048; #else unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); - unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; #endif + unsigned int pull_len; + + if (unlikely(skb_is_nonlinear(skb))) + goto add_tail_frag; /* will the data fit in the skb we allocated? if so, just * copy it as it is pretty small anyway */ - if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buffer->page_offset; - + if (size <= I40E_RX_HDR_SIZE) { memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* page is not reserved, we can reuse buffer as-is */ - if (likely(!i40e_page_is_reserved(page))) + /* page is reusable, we can reuse buffer as-is */ + if (likely(i40e_page_is_reusable(page))) return true; /* this page cannot be reused so discard it */ @@ -1047,34 +1070,26 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, return false; } - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rx_buffer->page_offset, size, truesize); - - /* avoid re-using remote pages */ - if (unlikely(i40e_page_is_reserved(page))) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; + /* we need the header to contain the greater of either + * ETH_HLEN or 60 bytes if the skb->len is less than + * 60 for skb_pad. + */ + pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= truesize; -#else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; + /* align pull length to size of long to optimize + * memcpy performance + */ + memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - if (rx_buffer->page_offset > last_offset) - return false; -#endif + /* update all of the pointers */ + va += pull_len; + size -= pull_len; - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - get_page(rx_buffer->page); +add_tail_frag: + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + (unsigned long)va & ~PAGE_MASK, size, truesize); - return true; + return i40e_can_reuse_rx_page(rx_buffer, page, truesize); } /** @@ -1089,18 +1104,21 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, */ static inline struct sk_buff *i40evf_fetch_rx_buffer(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc) + union i40e_rx_desc *rx_desc, + struct sk_buff *skb) { + u64 local_status_error_len = + le64_to_cpu(rx_desc->wb.qword1.status_error_len); + unsigned int size = + (local_status_error_len & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; struct i40e_rx_buffer *rx_buffer; - struct sk_buff *skb; struct page *page; rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; page = rx_buffer->page; prefetchw(page); - skb = rx_buffer->skb; - if (likely(!skb)) { void *page_addr = page_address(page) + rx_buffer->page_offset; @@ -1124,19 +1142,17 @@ struct sk_buff *i40evf_fetch_rx_buffer(struct i40e_ring *rx_ring, * it now to avoid a possible cache miss */ prefetchw(skb->data); - } else { - rx_buffer->skb = NULL; } /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - I40E_RXBUFFER_2048, + size, DMA_FROM_DEVICE); /* pull page into skb */ - if (i40e_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + if (i40e_add_rx_frag(rx_ring, rx_buffer, size, skb)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); rx_ring->rx_stats.page_reuse_count++; @@ -1180,8 +1196,6 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF))) return false; - /* place skb in next buffer to be received */ - rx_ring->rx_bi[ntc].skb = skb; rx_ring->rx_stats.non_eop_descs++; return true; @@ -1202,12 +1216,12 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; + struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); bool failure = false; while (likely(total_rx_packets < budget)) { union i40e_rx_desc *rx_desc; - struct sk_buff *skb; u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -1236,7 +1250,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) */ dma_rmb(); - skb = i40evf_fetch_rx_buffer(rx_ring, rx_desc); + skb = i40evf_fetch_rx_buffer(rx_ring, rx_desc, skb); if (!skb) break; @@ -1255,8 +1269,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) continue; } - if (i40e_cleanup_headers(rx_ring, skb)) + if (i40e_cleanup_headers(rx_ring, skb)) { + skb = NULL; continue; + } /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; @@ -1273,11 +1289,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; i40e_receive_skb(rx_ring, skb, vlan_tag); + skb = NULL; /* update budget accounting */ total_rx_packets++; } + rx_ring->skb = skb; + u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index a5fc789f78eb..8274ba68bd32 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -239,7 +239,6 @@ struct i40e_tx_buffer { }; struct i40e_rx_buffer { - struct sk_buff *skb; dma_addr_t dma; struct page *page; unsigned int page_offset; @@ -340,6 +339,14 @@ struct i40e_ring { struct rcu_head rcu; /* to avoid race on free */ u16 next_to_alloc; + struct sk_buff *skb; /* When i40evf_clean_rx_ring_irq() must + * return before it sees the EOP for + * the current packet, we save that skb + * here and resume receiving this + * packet the next time + * i40evf_clean_rx_ring_irq() is called + * for this ring. + */ } ____cacheline_internodealigned_in_smp; enum i40e_latency_range { diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 92ac60da5201..16bb88084bb9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -158,6 +158,7 @@ struct i40e_link_status { enum i40e_aq_link_speed link_speed; u8 link_info; u8 an_info; + u8 fec_info; u8 ext_info; u8 loopback; /* is Link Status Event notification to SW enabled */ @@ -442,6 +443,7 @@ struct i40e_bus_info { u16 func; u16 device; u16 lan_id; + u16 bus_id; }; /* Flow control (FC) parameters */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h index fc374f833aa9..d38a2b2aea2b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h @@ -81,6 +81,7 @@ enum i40e_virtchnl_ops { I40E_VIRTCHNL_OP_GET_STATS = 15, I40E_VIRTCHNL_OP_FCOE = 16, I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */ + I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21, I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23, I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24, I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index fffe4cf2c20b..00c42d803276 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -195,6 +195,7 @@ struct i40evf_adapter { u64 hw_csum_rx_error; u32 rx_desc_count; int num_msix_vectors; + u32 client_pending; struct msix_entry *msix_entries; u32 flags; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 3fe87e021148..920c1cb06a92 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -38,7 +38,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 25 +#define DRV_VERSION_BUILD 27 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ @@ -2726,6 +2726,7 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->subsystem_device_id = pdev->subsystem_device; hw->bus.device = PCI_SLOT(pdev->devfn); hw->bus.func = PCI_FUNC(pdev->devfn); + hw->bus.bus_id = pdev->bus->number; /* set up the locks for the AQ, do this only once in probe * and destroy them only once in remove diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 2059a8e88908..bee58af390e1 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -999,6 +999,10 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, if (v_opcode != adapter->current_op) return; break; + case I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP: + adapter->client_pending &= + ~(BIT(I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP)); + break; case I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS: { struct i40e_virtchnl_rss_hena *vrh = (struct i40e_virtchnl_rss_hena *)msg; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index e83444c34cf9..a2cc43d28888 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -91,6 +91,14 @@ #define IXGBE_RXBUFFER_4K 4096 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ +#define IXGBE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) +#if (PAGE_SIZE < 8192) +#define IXGBE_MAX_FRAME_BUILD_SKB \ + (SKB_WITH_OVERHEAD(IXGBE_RXBUFFER_2K) - IXGBE_SKB_PAD) +#else +#define IGB_MAX_FRAME_BUILD_SKB IXGBE_RXBUFFER_2K +#endif + /* * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we * reserve 64 more, and skb_shared_info adds an additional 320 bytes more, @@ -104,6 +112,9 @@ /* How many Rx Buffers do we bundle into one write to the hardware ? */ #define IXGBE_RX_BUFFER_WRITE 16 /* Must be power of 2 */ +#define IXGBE_RX_DMA_ATTR \ + (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) + enum ixgbe_tx_flags { /* cmd_type flags */ IXGBE_TX_FLAGS_HW_VLAN = 0x01, @@ -192,7 +203,12 @@ struct ixgbe_rx_buffer { struct sk_buff *skb; dma_addr_t dma; struct page *page; - unsigned int page_offset; +#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) + __u32 page_offset; +#else + __u16 page_offset; +#endif + __u16 pagecnt_bias; }; struct ixgbe_queue_stats { @@ -218,15 +234,20 @@ struct ixgbe_rx_queue_stats { #define IXGBE_TS_HDR_LEN 8 enum ixgbe_ring_state_t { + __IXGBE_RX_3K_BUFFER, + __IXGBE_RX_BUILD_SKB_ENABLED, + __IXGBE_RX_RSC_ENABLED, + __IXGBE_RX_CSUM_UDP_ZERO_ERR, + __IXGBE_RX_FCOE, __IXGBE_TX_FDIR_INIT_DONE, __IXGBE_TX_XPS_INIT_DONE, __IXGBE_TX_DETECT_HANG, __IXGBE_HANG_CHECK_ARMED, - __IXGBE_RX_RSC_ENABLED, - __IXGBE_RX_CSUM_UDP_ZERO_ERR, - __IXGBE_RX_FCOE, }; +#define ring_uses_build_skb(ring) \ + test_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &(ring)->state) + struct ixgbe_fwd_adapter { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct net_device *netdev; @@ -336,19 +357,20 @@ struct ixgbe_ring_feature { */ static inline unsigned int ixgbe_rx_bufsz(struct ixgbe_ring *ring) { -#ifdef IXGBE_FCOE - if (test_bit(__IXGBE_RX_FCOE, &ring->state)) - return (PAGE_SIZE < 8192) ? IXGBE_RXBUFFER_4K : - IXGBE_RXBUFFER_3K; + if (test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) + return IXGBE_RXBUFFER_3K; +#if (PAGE_SIZE < 8192) + if (ring_uses_build_skb(ring)) + return IXGBE_MAX_FRAME_BUILD_SKB; #endif return IXGBE_RXBUFFER_2K; } static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) { -#ifdef IXGBE_FCOE - if (test_bit(__IXGBE_RX_FCOE, &ring->state)) - return (PAGE_SIZE < 8192) ? 1 : 0; +#if (PAGE_SIZE < 8192) + if (test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) + return 1; #endif return 0; } @@ -539,6 +561,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_VLAN_PROMISC BIT(13) #define IXGBE_FLAG2_EEE_CAPABLE BIT(14) #define IXGBE_FLAG2_EEE_ENABLED BIT(15) +#define IXGBE_FLAG2_RX_LEGACY BIT(16) /* Tx fast path data */ int num_tx_queues; @@ -751,7 +774,7 @@ extern const struct ixgbe_info ixgbe_X550EM_x_info; extern const struct ixgbe_info ixgbe_x550em_a_info; extern const struct ixgbe_info ixgbe_x550em_a_fw_info; #ifdef CONFIG_IXGBE_DCB -extern const struct dcbnl_rtnl_ops dcbnl_ops; +extern const struct dcbnl_rtnl_ops ixgbe_dcbnl_ops; #endif extern char ixgbe_driver_name[]; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c index b8fc3cfec831..78c52375acc6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c @@ -777,7 +777,7 @@ static u8 ixgbe_dcbnl_setdcbx(struct net_device *dev, u8 mode) return err ? 1 : 0; } -const struct dcbnl_rtnl_ops dcbnl_ops = { +const struct dcbnl_rtnl_ops ixgbe_dcbnl_ops = { .ieee_getets = ixgbe_dcbnl_ieee_getets, .ieee_setets = ixgbe_dcbnl_ieee_setets, .ieee_getpfc = ixgbe_dcbnl_ieee_getpfc, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index ee28e54a4f75..a7574c7b12af 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -151,6 +151,13 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { }; #define IXGBE_TEST_LEN sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN +static const char ixgbe_priv_flags_strings[][ETH_GSTRING_LEN] = { +#define IXGBE_PRIV_FLAGS_LEGACY_RX BIT(0) + "legacy-rx", +}; + +#define IXGBE_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbe_priv_flags_strings) + /* currently supported speeds for 10G */ #define ADVRTSD_MSK_10G (SUPPORTED_10000baseT_Full | \ SUPPORTED_10000baseKX4_Full | \ @@ -340,6 +347,9 @@ static int ixgbe_get_settings(struct net_device *netdev, case IXGBE_LINK_SPEED_10GB_FULL: ethtool_cmd_speed_set(ecmd, SPEED_10000); break; + case IXGBE_LINK_SPEED_5GB_FULL: + ethtool_cmd_speed_set(ecmd, SPEED_5000); + break; case IXGBE_LINK_SPEED_2_5GB_FULL: ethtool_cmd_speed_set(ecmd, SPEED_2500); break; @@ -998,6 +1008,8 @@ static void ixgbe_get_drvinfo(struct net_device *netdev, strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info)); + + drvinfo->n_priv_flags = IXGBE_PRIV_FLAGS_STR_LEN; } static void ixgbe_get_ringparam(struct net_device *netdev, @@ -1137,6 +1149,8 @@ static int ixgbe_get_sset_count(struct net_device *netdev, int sset) return IXGBE_TEST_LEN; case ETH_SS_STATS: return IXGBE_STATS_LEN; + case ETH_SS_PRIV_FLAGS: + return IXGBE_PRIV_FLAGS_STR_LEN; default: return -EOPNOTSUPP; } @@ -1261,6 +1275,9 @@ static void ixgbe_get_strings(struct net_device *netdev, u32 stringset, } /* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */ break; + case ETH_SS_PRIV_FLAGS: + memcpy(data, ixgbe_priv_flags_strings, + IXGBE_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN); } } @@ -1865,7 +1882,7 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, tx_ntc = tx_ring->next_to_clean; rx_desc = IXGBE_RX_DESC(rx_ring, rx_ntc); - while (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) { + while (rx_desc->wb.upper.length) { /* check Rx buffer */ rx_buffer = &rx_ring->rx_buffer_info[rx_ntc]; @@ -1887,7 +1904,16 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring, /* unmap buffer on Tx side */ tx_buffer = &tx_ring->tx_buffer_info[tx_ntc]; - ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer); + + /* Free all the Tx ring sk_buffs */ + dev_kfree_skb_any(tx_buffer->skb); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); /* increment Rx/Tx next to clean counters */ rx_ntc++; @@ -3342,6 +3368,37 @@ static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_eee *edata) return 0; } +static u32 ixgbe_get_priv_flags(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + u32 priv_flags = 0; + + if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) + priv_flags |= IXGBE_PRIV_FLAGS_LEGACY_RX; + + return priv_flags; +} + +static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + unsigned int flags2 = adapter->flags2; + + flags2 &= ~IXGBE_FLAG2_RX_LEGACY; + if (priv_flags & IXGBE_PRIV_FLAGS_LEGACY_RX) + flags2 |= IXGBE_FLAG2_RX_LEGACY; + + if (flags2 != adapter->flags2) { + adapter->flags2 = flags2; + + /* reset interface to repopulate queues */ + if (netif_running(netdev)) + ixgbe_reinit_locked(adapter); + } + + return 0; +} + static const struct ethtool_ops ixgbe_ethtool_ops = { .get_settings = ixgbe_get_settings, .set_settings = ixgbe_set_settings, @@ -3378,6 +3435,8 @@ static const struct ethtool_ops ixgbe_ethtool_ops = { .set_eee = ixgbe_set_eee, .get_channels = ixgbe_get_channels, .set_channels = ixgbe_set_channels, + .get_priv_flags = ixgbe_get_priv_flags, + .set_priv_flags = ixgbe_set_priv_flags, .get_ts_info = ixgbe_get_ts_info, .get_module_info = ixgbe_get_module_info, .get_module_eeprom = ixgbe_get_module_eeprom, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 86135c00d4b1..060cdce8058f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -72,7 +72,7 @@ char ixgbe_default_device_descr[] = static char ixgbe_default_device_descr[] = "Intel(R) 10 Gigabit Network Connection"; #endif -#define DRV_VERSION "4.4.0-k" +#define DRV_VERSION "5.0.0-k" const char ixgbe_driver_version[] = DRV_VERSION; static const char ixgbe_copyright[] = "Copyright (c) 1999-2016 Intel Corporation."; @@ -945,28 +945,6 @@ static inline void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter, } } -void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *ring, - struct ixgbe_tx_buffer *tx_buffer) -{ - if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); - if (dma_unmap_len(tx_buffer, len)) - dma_unmap_single(ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } else if (dma_unmap_len(tx_buffer, len)) { - dma_unmap_page(ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - } - tx_buffer->next_to_watch = NULL; - tx_buffer->skb = NULL; - dma_unmap_len_set(tx_buffer, len, 0); - /* tx_buffer must be completely set up in the transmit path */ -} - static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -1198,7 +1176,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, DMA_TO_DEVICE); /* clear tx_buffer data */ - tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); /* unmap remaining buffers */ @@ -1552,6 +1529,11 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, } } +static inline unsigned int ixgbe_rx_offset(struct ixgbe_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? IXGBE_SKB_PAD : 0; +} + static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *bi) { @@ -1570,8 +1552,10 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, } /* map page for use */ - dma = dma_map_page(rx_ring->dev, page, 0, - ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + dma = dma_map_page_attrs(rx_ring->dev, page, 0, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBE_RX_DMA_ATTR); /* * if mapping failed free memory back to system since @@ -1586,7 +1570,8 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, bi->dma = dma; bi->page = page; - bi->page_offset = 0; + bi->page_offset = ixgbe_rx_offset(rx_ring); + bi->pagecnt_bias = 1; return true; } @@ -1601,6 +1586,7 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *bi; u16 i = rx_ring->next_to_use; + u16 bufsz; /* nothing to do */ if (!cleaned_count) @@ -1610,10 +1596,17 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) bi = &rx_ring->rx_buffer_info[i]; i -= rx_ring->count; + bufsz = ixgbe_rx_bufsz(rx_ring); + do { if (!ixgbe_alloc_mapped_page(rx_ring, bi)) break; + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, + bi->page_offset, bufsz, + DMA_FROM_DEVICE); + /* * Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. @@ -1629,8 +1622,8 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) i -= rx_ring->count; } - /* clear the status bits for the next_to_use descriptor */ - rx_desc->wb.upper.status_error = 0; + /* clear the length for the next_to_use descriptor */ + rx_desc->wb.upper.length = 0; cleaned_count--; } while (cleaned_count); @@ -1832,19 +1825,19 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, { /* if the page was released unmap it, else just sync our portion */ if (unlikely(IXGBE_CB(skb)->page_released)) { - dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, - ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); - IXGBE_CB(skb)->page_released = false; + dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBE_RX_DMA_ATTR); } else { struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; dma_sync_single_range_for_cpu(rx_ring->dev, IXGBE_CB(skb)->dma, frag->page_offset, - ixgbe_rx_bufsz(rx_ring), + skb_frag_size(frag), DMA_FROM_DEVICE); } - IXGBE_CB(skb)->dma = 0; } /** @@ -1880,7 +1873,7 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, } /* place header in linear portion of buffer */ - if (skb_is_nonlinear(skb)) + if (!skb_headlen(skb)) ixgbe_pull_tail(rx_ring, skb); #ifdef IXGBE_FCOE @@ -1915,14 +1908,14 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, nta++; rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - /* transfer page from old buffer to new buffer */ - *new_buff = *old_buff; - - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma, - new_buff->page_offset, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); + /* Transfer page from old buffer to new buffer. + * Move each member individually to avoid possible store + * forwarding stalls and unnecessary copy of skb. + */ + new_buff->dma = old_buff->dma; + new_buff->page = old_buff->page; + new_buff->page_offset = old_buff->page_offset; + new_buff->pagecnt_bias = old_buff->pagecnt_bias; } static inline bool ixgbe_page_is_reserved(struct page *page) @@ -1930,6 +1923,43 @@ static inline bool ixgbe_page_is_reserved(struct page *page) return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } +static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer) +{ + unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; + struct page *page = rx_buffer->page; + + /* avoid re-using remote pages */ + if (unlikely(ixgbe_page_is_reserved(page))) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) + return false; +#else + /* The last offset is a bit aggressive in that we assume the + * worst case of FCoE being enabled and using a 3K buffer. + * However this should have minimal impact as the 1K extra is + * still less than one buffer in size. + */ +#define IXGBE_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBE_RXBUFFER_3K) + if (rx_buffer->page_offset > IXGBE_LAST_OFFSET) + return false; +#endif + + /* If we have drained the page fragment pool we need to update + * the pagecnt_bias and page count so that we fully restock the + * number of references the driver holds. + */ + if (unlikely(!pagecnt_bias)) { + page_ref_add(page, USHRT_MAX); + rx_buffer->pagecnt_bias = USHRT_MAX; + } + + return true; +} + /** * ixgbe_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on @@ -1945,144 +1975,172 @@ static inline bool ixgbe_page_is_reserved(struct page *page) * The function will then update the page offset if necessary and return * true if the buffer can be reused by the adapter. **/ -static bool ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, +static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) + struct sk_buff *skb, + unsigned int size) { - struct page *page = rx_buffer->page; - unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); #if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbe_rx_bufsz(rx_ring); + unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); - unsigned int last_offset = ixgbe_rx_pg_size(rx_ring) - - ixgbe_rx_bufsz(rx_ring); + unsigned int truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) : + SKB_DATA_ALIGN(size); #endif - - if ((size <= IXGBE_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { - unsigned char *va = page_address(page) + rx_buffer->page_offset; - - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - - /* page is not reserved, we can reuse buffer as-is */ - if (likely(!ixgbe_page_is_reserved(page))) - return true; - - /* this page cannot be reused so discard it */ - __free_pages(page, ixgbe_rx_pg_order(rx_ring)); - return false; - } - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, rx_buffer->page_offset, size, truesize); - - /* avoid re-using remote pages */ - if (unlikely(ixgbe_page_is_reserved(page))) - return false; - #if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) - return false; - - /* flip page offset to other buffer */ rx_buffer->page_offset ^= truesize; #else - /* move offset up to the next cache line */ rx_buffer->page_offset += truesize; - - if (rx_buffer->page_offset > last_offset) - return false; #endif - - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - page_ref_inc(page); - - return true; } -static struct sk_buff *ixgbe_fetch_rx_buffer(struct ixgbe_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc) +static struct ixgbe_rx_buffer *ixgbe_get_rx_buffer(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff **skb, + const unsigned int size) { struct ixgbe_rx_buffer *rx_buffer; - struct sk_buff *skb; - struct page *page; rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - page = rx_buffer->page; - prefetchw(page); + prefetchw(rx_buffer->page); + *skb = rx_buffer->skb; - skb = rx_buffer->skb; + /* Delay unmapping of the first packet. It carries the header + * information, HW may still access the header after the writeback. + * Only unmap it when EOP is reached + */ + if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) { + if (!*skb) + goto skip_sync; + } else { + if (*skb) + ixgbe_dma_sync_frag(rx_ring, *skb); + } - if (likely(!skb)) { - void *page_addr = page_address(page) + - rx_buffer->page_offset; + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + size, + DMA_FROM_DEVICE); +skip_sync: + rx_buffer->pagecnt_bias--; - /* prefetch first cache line of first page */ - prefetch(page_addr); -#if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); -#endif + return rx_buffer; +} - /* allocate a skb to store the frags */ - skb = napi_alloc_skb(&rx_ring->q_vector->napi, - IXGBE_RX_HDR_SIZE); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - return NULL; +static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *rx_buffer, + struct sk_buff *skb) +{ + if (ixgbe_can_reuse_rx_page(rx_buffer)) { + /* hand second half of page back to the ring */ + ixgbe_reuse_rx_page(rx_ring, rx_buffer); + } else { + if (IXGBE_CB(skb)->dma == rx_buffer->dma) { + /* the page has been released from the ring */ + IXGBE_CB(skb)->page_released = true; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBE_RX_DMA_ATTR); } + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); + } - /* - * we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); + /* clear contents of rx_buffer */ + rx_buffer->page = NULL; + rx_buffer->skb = NULL; +} - /* - * Delay unmapping of the first packet. It carries the - * header information, HW may still access the header - * after the writeback. Only unmap it when EOP is - * reached - */ - if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) - goto dma_sync; +static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *rx_buffer, + union ixgbe_adv_rx_desc *rx_desc, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size); +#endif + struct sk_buff *skb; - IXGBE_CB(skb)->dma = rx_buffer->dma; - } else { - if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) - ixgbe_dma_sync_frag(rx_ring, skb); + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif -dma_sync: - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); + /* allocate a skb to store the frags */ + skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE); + if (unlikely(!skb)) + return NULL; - rx_buffer->skb = NULL; - } + if (size > IXGBE_RX_HDR_SIZE) { + if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) + IXGBE_CB(skb)->dma = rx_buffer->dma; - /* pull page into skb */ - if (ixgbe_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { - /* hand second half of page back to the ring */ - ixgbe_reuse_rx_page(rx_ring, rx_buffer); - } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { - /* the page has been released from the ring */ - IXGBE_CB(skb)->page_released = true; + skb_add_rx_frag(skb, 0, rx_buffer->page, + rx_buffer->page_offset, + size, truesize); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE); + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + rx_buffer->pagecnt_bias++; } - /* clear contents of buffer_info */ - rx_buffer->page = NULL; + return skb; +} + +static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *rx_buffer, + union ixgbe_adv_rx_desc *rx_desc, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(IXGBE_SKB_PAD + size); +#endif + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + + /* build an skb to around the page buffer */ + skb = build_skb(va - IXGBE_SKB_PAD, truesize); + if (unlikely(!skb)) + return NULL; + + /* update pointers within the skb to store the data */ + skb_reserve(skb, IXGBE_SKB_PAD); + __skb_put(skb, size); + + /* record DMA address if this is the start of a chain of buffers */ + if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) + IXGBE_CB(skb)->dma = rx_buffer->dma; + + /* update buffer offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif return skb; } @@ -2114,7 +2172,9 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; + struct ixgbe_rx_buffer *rx_buffer; struct sk_buff *skb; + unsigned int size; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { @@ -2123,8 +2183,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, } rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); - - if (!rx_desc->wb.upper.status_error) + size = le16_to_cpu(rx_desc->wb.upper.length); + if (!size) break; /* This memory barrier is needed to keep us from reading @@ -2133,13 +2193,26 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ dma_rmb(); + rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size); + /* retrieve a buffer from the ring */ - skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc); + if (skb) + ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size); + else if (ring_uses_build_skb(rx_ring)) + skb = ixgbe_build_skb(rx_ring, rx_buffer, + rx_desc, size); + else + skb = ixgbe_construct_skb(rx_ring, rx_buffer, + rx_desc, size); /* exit if we failed to retrieve a buffer */ - if (!skb) + if (!skb) { + rx_ring->rx_stats.alloc_rx_buff_failed++; + rx_buffer->pagecnt_bias++; break; + } + ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb); cleaned_count++; /* place incomplete frames back on ring for completion */ @@ -3197,6 +3270,10 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, clear_bit(__IXGBE_HANG_CHECK_ARMED, &ring->state); + /* reinitialize tx_buffer_info */ + memset(ring->tx_buffer_info, 0, + sizeof(struct ixgbe_tx_buffer) * ring->count); + /* enable queue */ IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), txdctl); @@ -3367,7 +3444,10 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; /* configure the packet buffer length */ - srrctl |= ixgbe_rx_bufsz(rx_ring) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + if (test_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state)) + srrctl |= IXGBE_RXBUFFER_3K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + else + srrctl |= IXGBE_RXBUFFER_2K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; /* configure descriptor type */ srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; @@ -3668,6 +3748,7 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, struct ixgbe_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; + union ixgbe_adv_rx_desc *rx_desc; u64 rdba = ring->dma; u32 rxdctl; u8 reg_idx = ring->reg_idx; @@ -3700,8 +3781,27 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, */ rxdctl &= ~0x3FFFFF; rxdctl |= 0x080420; +#if (PAGE_SIZE < 8192) + } else { + rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | + IXGBE_RXDCTL_RLPML_EN); + + /* Limit the maximum frame size so we don't overrun the skb */ + if (ring_uses_build_skb(ring) && + !test_bit(__IXGBE_RX_3K_BUFFER, &ring->state)) + rxdctl |= IXGBE_MAX_FRAME_BUILD_SKB | + IXGBE_RXDCTL_RLPML_EN; +#endif } + /* initialize rx_buffer_info */ + memset(ring->rx_buffer_info, 0, + sizeof(struct ixgbe_rx_buffer) * ring->count); + + /* initialize Rx descriptor 0 */ + rx_desc = IXGBE_RX_DESC(ring, 0); + rx_desc->wb.upper.length = 0; + /* enable receive descriptor ring */ rxdctl |= IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl); @@ -3838,10 +3938,30 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) */ for (i = 0; i < adapter->num_rx_queues; i++) { rx_ring = adapter->rx_ring[i]; + + clear_ring_rsc_enabled(rx_ring); + clear_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); + clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state); + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) set_ring_rsc_enabled(rx_ring); - else - clear_ring_rsc_enabled(rx_ring); + + if (test_bit(__IXGBE_RX_FCOE, &rx_ring->state)) + set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); + + clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state); + if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) + continue; + + set_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state); + +#if (PAGE_SIZE < 8192) + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) + set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); + + if (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)) + set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state); +#endif } } @@ -4855,45 +4975,47 @@ static void ixgbe_fwd_psrtype(struct ixgbe_fwd_adapter *vadapter) **/ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) { - struct device *dev = rx_ring->dev; - unsigned long size; - u16 i; - - /* ring already cleared, nothing to do */ - if (!rx_ring->rx_buffer_info) - return; + u16 i = rx_ring->next_to_clean; + struct ixgbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i]; /* Free all the Rx ring sk_buffs */ - for (i = 0; i < rx_ring->count; i++) { - struct ixgbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i]; - + while (i != rx_ring->next_to_alloc) { if (rx_buffer->skb) { struct sk_buff *skb = rx_buffer->skb; if (IXGBE_CB(skb)->page_released) - dma_unmap_page(dev, - IXGBE_CB(skb)->dma, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); + dma_unmap_page_attrs(rx_ring->dev, + IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBE_RX_DMA_ATTR); dev_kfree_skb(skb); - rx_buffer->skb = NULL; } - if (!rx_buffer->page) - continue; + /* Invalidate cache lines that may have been written to by + * device so that we avoid corrupting memory. + */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); - dma_unmap_page(dev, rx_buffer->dma, - ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); - __free_pages(rx_buffer->page, ixgbe_rx_pg_order(rx_ring)); + /* free resources associated with mapping */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBE_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); - rx_buffer->page = NULL; + i++; + rx_buffer++; + if (i == rx_ring->count) { + i = 0; + rx_buffer = rx_ring->rx_buffer_info; + } } - size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; - memset(rx_ring->rx_buffer_info, 0, size); - - /* Zero out the descriptor ring */ - memset(rx_ring->desc, 0, rx_ring->size); - rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; @@ -5362,28 +5484,57 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) **/ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring) { - struct ixgbe_tx_buffer *tx_buffer_info; - unsigned long size; - u16 i; + u16 i = tx_ring->next_to_clean; + struct ixgbe_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; - /* ring already cleared, nothing to do */ - if (!tx_ring->tx_buffer_info) - return; + while (i != tx_ring->next_to_use) { + union ixgbe_adv_tx_desc *eop_desc, *tx_desc; - /* Free all the Tx ring sk_buffs */ - for (i = 0; i < tx_ring->count; i++) { - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); - } + /* Free all the Tx ring sk_buffs */ + dev_kfree_skb_any(tx_buffer->skb); - netdev_tx_reset_queue(txring_txq(tx_ring)); + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); - size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count; - memset(tx_ring->tx_buffer_info, 0, size); + /* check for eop_desc to determine the end of the packet */ + eop_desc = tx_buffer->next_to_watch; + tx_desc = IXGBE_TX_DESC(tx_ring, i); + + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buffer++; + tx_desc++; + i++; + if (unlikely(i == tx_ring->count)) { + i = 0; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IXGBE_TX_DESC(tx_ring, 0); + } + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buffer++; + i++; + if (unlikely(i == tx_ring->count)) { + i = 0; + tx_buffer = tx_ring->tx_buffer_info; + } + } - /* Zero out the descriptor ring */ - memset(tx_ring->desc, 0, tx_ring->size); + /* reset BQL for queue */ + netdev_tx_reset_queue(txring_txq(tx_ring)); + /* reset next_to_use and next_to_clean */ tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; } @@ -5829,9 +5980,9 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) if (tx_ring->q_vector) ring_node = tx_ring->q_vector->numa_node; - tx_ring->tx_buffer_info = vzalloc_node(size, ring_node); + tx_ring->tx_buffer_info = vmalloc_node(size, ring_node); if (!tx_ring->tx_buffer_info) - tx_ring->tx_buffer_info = vzalloc(size); + tx_ring->tx_buffer_info = vmalloc(size); if (!tx_ring->tx_buffer_info) goto err; @@ -5913,9 +6064,9 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) if (rx_ring->q_vector) ring_node = rx_ring->q_vector->numa_node; - rx_ring->rx_buffer_info = vzalloc_node(size, ring_node); + rx_ring->rx_buffer_info = vmalloc_node(size, ring_node); if (!rx_ring->rx_buffer_info) - rx_ring->rx_buffer_info = vzalloc(size); + rx_ring->rx_buffer_info = vmalloc(size); if (!rx_ring->rx_buffer_info) goto err; @@ -7630,18 +7781,32 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, return; dma_error: dev_err(tx_ring->dev, "TX DMA map failed\n"); + tx_buffer = &tx_ring->tx_buffer_info[i]; /* clear dma mappings for failed tx_buffer_info map */ - for (;;) { + while (tx_buffer != first) { + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + + if (i--) + i += tx_ring->count; tx_buffer = &tx_ring->tx_buffer_info[i]; - ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer); - if (tx_buffer == first) - break; - if (i == 0) - i = tx_ring->count; - i--; } + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + + dev_kfree_skb_any(first->skb); + first->skb = NULL; + tx_ring->next_to_use = i; } @@ -9687,7 +9852,7 @@ skip_sriov: #ifdef CONFIG_IXGBE_DCB if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE) - netdev->dcbnl_ops = &dcbnl_ops; + netdev->dcbnl_ops = &ixgbe_dcbnl_ops; #endif #ifdef IXGBE_FCOE diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 2fcde8777a29..e55b2602f371 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -768,9 +768,7 @@ s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw, ixgbe_link_speed speed, bool autoneg_wait_to_complete) { - - /* - * Clear autoneg_advertised and set new values based on input link + /* Clear autoneg_advertised and set new values based on input link * speed. */ hw->phy.autoneg_advertised = 0; @@ -778,6 +776,12 @@ s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw, if (speed & IXGBE_LINK_SPEED_10GB_FULL) hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; + if (speed & IXGBE_LINK_SPEED_5GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_5GB_FULL; + + if (speed & IXGBE_LINK_SPEED_2_5GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_2_5GB_FULL; + if (speed & IXGBE_LINK_SPEED_1GB_FULL) hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index fdf71720e707..61dd4462411c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3959,7 +3959,7 @@ static const struct net_device_ops mvneta_netdev_ops = { .ndo_do_ioctl = mvneta_ioctl, }; -const struct ethtool_ops mvneta_eth_tool_ops = { +static const struct ethtool_ops mvneta_eth_tool_ops = { .nway_reset = phy_ethtool_nway_reset, .get_link = ethtool_op_get_link, .set_coalesce = mvneta_ethtool_set_coalesce, diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 6b8635378f1f..fa6d2354a0e9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -81,8 +81,9 @@ void mlx4_cq_tasklet_cb(unsigned long data) static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq) { - unsigned long flags; struct mlx4_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; + unsigned long flags; + bool kick; spin_lock_irqsave(&tasklet_ctx->lock, flags); /* When migrating CQs between EQs will be implemented, please note @@ -92,7 +93,10 @@ static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq) */ if (list_empty_careful(&cq->tasklet_ctx.list)) { atomic_inc(&cq->refcount); + kick = list_empty(&tasklet_ctx->list); list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list); + if (kick) + tasklet_schedule(&tasklet_ctx->task); } spin_unlock_irqrestore(&tasklet_ctx->lock, flags); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 504461a464c5..e7b81a305469 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -62,12 +62,13 @@ void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev, struct skb_shared_hwtstamps *hwts, u64 timestamp) { - unsigned long flags; + unsigned int seq; u64 nsec; - read_lock_irqsave(&mdev->clock_lock, flags); - nsec = timecounter_cyc2time(&mdev->clock, timestamp); - read_unlock_irqrestore(&mdev->clock_lock, flags); + do { + seq = read_seqbegin(&mdev->clock_lock); + nsec = timecounter_cyc2time(&mdev->clock, timestamp); + } while (read_seqretry(&mdev->clock_lock, seq)); memset(hwts, 0, sizeof(struct skb_shared_hwtstamps)); hwts->hwtstamp = ns_to_ktime(nsec); @@ -95,9 +96,9 @@ void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev) unsigned long flags; if (timeout) { - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_read(&mdev->clock); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); mdev->last_overflow_check = jiffies; } } @@ -128,10 +129,10 @@ static int mlx4_en_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) adj *= delta; diff = div_u64(adj, 1000000000ULL); - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_read(&mdev->clock); mdev->cycles.mult = neg_adj ? mult - diff : mult + diff; - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); return 0; } @@ -149,9 +150,9 @@ static int mlx4_en_phc_adjtime(struct ptp_clock_info *ptp, s64 delta) ptp_clock_info); unsigned long flags; - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_adjtime(&mdev->clock, delta); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); return 0; } @@ -172,9 +173,9 @@ static int mlx4_en_phc_gettime(struct ptp_clock_info *ptp, unsigned long flags; u64 ns; - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); ns = timecounter_read(&mdev->clock); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); *ts = ns_to_timespec64(ns); @@ -198,9 +199,9 @@ static int mlx4_en_phc_settime(struct ptp_clock_info *ptp, unsigned long flags; /* reset the timecounter */ - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_init(&mdev->clock, &mdev->cycles, ns); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); return 0; } @@ -266,7 +267,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) if (mdev->ptp_clock) return; - rwlock_init(&mdev->clock_lock); + seqlock_init(&mdev->clock_lock); memset(&mdev->cycles, 0, sizeof(mdev->cycles)); mdev->cycles.read = mlx4_en_read_clock; @@ -276,10 +277,10 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift); mdev->nominal_c_mult = mdev->cycles.mult; - write_lock_irqsave(&mdev->clock_lock, flags); + write_seqlock_irqsave(&mdev->clock_lock, flags); timecounter_init(&mdev->clock, &mdev->cycles, ktime_to_ns(ktime_get_real())); - write_unlock_irqrestore(&mdev->clock_lock, flags); + write_sequnlock_irqrestore(&mdev->clock_lock, flags); /* Calculate period in seconds to call the overflow watchdog - to make * sure counter is checked at least once every wrap around. diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 0509996957d9..39232b6a974f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -494,7 +494,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_eqe *eqe; - int cqn = -1; + int cqn; int eqes_found = 0; int set_ci = 0; int port; @@ -840,13 +840,6 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eq_set_ci(eq, 1); - /* cqn is 24bit wide but is initialized such that its higher bits - * are ones too. Thus, if we got any event, cqn's high bits should be off - * and we need to schedule the tasklet. - */ - if (!(cqn & ~0xffffff)) - tasklet_schedule(&eq->tasklet_ctx.task); - return eqes_found; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index cec59bc264c9..d8ca6d1794ef 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -424,9 +424,9 @@ struct mlx4_en_dev { u32 priv_pdn; spinlock_t uar_lock; u8 mac_removed[MLX4_MAX_PORTS + 1]; - rwlock_t clock_lock; u32 nominal_c_mult; struct cyclecounter cycles; + seqlock_t clock_lock; struct timecounter clock; unsigned long last_overflow_check; unsigned long overflow_period; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d87a82682cb5..44406a5ec15d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1240,10 +1240,14 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + preempt_disable(); + tcf_exts_to_list(f->exts, &actions); list_for_each_entry(a, &actions, list) tcf_action_stats_update(a, bytes, packets, lastuse); + preempt_enable(); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 42bb18feb316..5f337715a4da 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -651,17 +651,16 @@ int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port) { struct mlxsw_afa_fwd_entry_ref *fwd_entry_ref; - u32 kvdl_index = 0; + u32 kvdl_index; char *act; int err; - if (!in_port) { - fwd_entry_ref = mlxsw_afa_fwd_entry_ref_create(block, - local_port); - if (IS_ERR(fwd_entry_ref)) - return PTR_ERR(fwd_entry_ref); - kvdl_index = fwd_entry_ref->fwd_entry->kvdl_index; - } + if (in_port) + return -EOPNOTSUPP; + fwd_entry_ref = mlxsw_afa_fwd_entry_ref_create(block, local_port); + if (IS_ERR(fwd_entry_ref)) + return PTR_ERR(fwd_entry_ref); + kvdl_index = fwd_entry_ref->fwd_entry->kvdl_index; act = mlxsw_afa_block_append_action(block, MLXSW_AFA_FORWARD_CODE, MLXSW_AFA_FORWARD_SIZE); @@ -669,13 +668,12 @@ int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, err = -ENOBUFS; goto err_append_action; } - mlxsw_afa_forward_pack(act, MLXSW_AFA_FORWARD_TYPE_OUTPUT, + mlxsw_afa_forward_pack(act, MLXSW_AFA_FORWARD_TYPE_PBS, kvdl_index, in_port); return 0; err_append_action: - if (!in_port) - mlxsw_afa_fwd_entry_ref_destroy(block, fwd_entry_ref); + mlxsw_afa_fwd_entry_ref_destroy(block, fwd_entry_ref); return err; } EXPORT_SYMBOL(mlxsw_afa_block_append_fwd); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 37c018bea5e2..16484f24b7db 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3078,7 +3078,6 @@ static int __mlxsw_sp_flood_init(struct mlxsw_core *mlxsw_core, flood_table = MLXSW_SP_FLOOD_TABLE_UC; break; case MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4: - case MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6: flood_table = MLXSW_SP_FLOOD_TABLE_MC; break; default: diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 58ba5d3f9e5f..92367a06491a 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -4237,14 +4237,15 @@ static int nv_set_wol(struct net_device *dev, struct ethtool_wolinfo *wolinfo) return 0; } -static int nv_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int nv_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct fe_priv *np = netdev_priv(dev); - u32 speed; + u32 speed, supported, advertising; int adv; spin_lock_irq(&np->lock); - ecmd->port = PORT_MII; + cmd->base.port = PORT_MII; if (!netif_running(dev)) { /* We do not track link speed / duplex setting if the * interface is disabled. Force a link check */ @@ -4272,64 +4273,71 @@ static int nv_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) speed = -1; break; } - ecmd->duplex = DUPLEX_HALF; + cmd->base.duplex = DUPLEX_HALF; if (np->duplex) - ecmd->duplex = DUPLEX_FULL; + cmd->base.duplex = DUPLEX_FULL; } else { speed = SPEED_UNKNOWN; - ecmd->duplex = DUPLEX_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } - ethtool_cmd_speed_set(ecmd, speed); - ecmd->autoneg = np->autoneg; + cmd->base.speed = speed; + cmd->base.autoneg = np->autoneg; - ecmd->advertising = ADVERTISED_MII; + advertising = ADVERTISED_MII; if (np->autoneg) { - ecmd->advertising |= ADVERTISED_Autoneg; + advertising |= ADVERTISED_Autoneg; adv = mii_rw(dev, np->phyaddr, MII_ADVERTISE, MII_READ); if (adv & ADVERTISE_10HALF) - ecmd->advertising |= ADVERTISED_10baseT_Half; + advertising |= ADVERTISED_10baseT_Half; if (adv & ADVERTISE_10FULL) - ecmd->advertising |= ADVERTISED_10baseT_Full; + advertising |= ADVERTISED_10baseT_Full; if (adv & ADVERTISE_100HALF) - ecmd->advertising |= ADVERTISED_100baseT_Half; + advertising |= ADVERTISED_100baseT_Half; if (adv & ADVERTISE_100FULL) - ecmd->advertising |= ADVERTISED_100baseT_Full; + advertising |= ADVERTISED_100baseT_Full; if (np->gigabit == PHY_GIGABIT) { adv = mii_rw(dev, np->phyaddr, MII_CTRL1000, MII_READ); if (adv & ADVERTISE_1000FULL) - ecmd->advertising |= ADVERTISED_1000baseT_Full; + advertising |= ADVERTISED_1000baseT_Full; } } - ecmd->supported = (SUPPORTED_Autoneg | + supported = (SUPPORTED_Autoneg | SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_MII); if (np->gigabit == PHY_GIGABIT) - ecmd->supported |= SUPPORTED_1000baseT_Full; + supported |= SUPPORTED_1000baseT_Full; - ecmd->phy_address = np->phyaddr; - ecmd->transceiver = XCVR_EXTERNAL; + cmd->base.phy_address = np->phyaddr; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); /* ignore maxtxpkt, maxrxpkt for now */ spin_unlock_irq(&np->lock); return 0; } -static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +static int nv_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct fe_priv *np = netdev_priv(dev); - u32 speed = ethtool_cmd_speed(ecmd); + u32 speed = cmd->base.speed; + u32 advertising; - if (ecmd->port != PORT_MII) - return -EINVAL; - if (ecmd->transceiver != XCVR_EXTERNAL) + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + + if (cmd->base.port != PORT_MII) return -EINVAL; - if (ecmd->phy_address != np->phyaddr) { + if (cmd->base.phy_address != np->phyaddr) { /* TODO: support switching between multiple phys. Should be * trivial, but not enabled due to lack of test hardware. */ return -EINVAL; } - if (ecmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { u32 mask; mask = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | @@ -4337,16 +4345,17 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) if (np->gigabit == PHY_GIGABIT) mask |= ADVERTISED_1000baseT_Full; - if ((ecmd->advertising & mask) == 0) + if ((advertising & mask) == 0) return -EINVAL; - } else if (ecmd->autoneg == AUTONEG_DISABLE) { + } else if (cmd->base.autoneg == AUTONEG_DISABLE) { /* Note: autonegotiation disable, speed 1000 intentionally * forbidden - no one should need that. */ if (speed != SPEED_10 && speed != SPEED_100) return -EINVAL; - if (ecmd->duplex != DUPLEX_HALF && ecmd->duplex != DUPLEX_FULL) + if (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL) return -EINVAL; } else { return -EINVAL; @@ -4376,7 +4385,7 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) netif_tx_unlock_bh(dev); } - if (ecmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { int adv, bmcr; np->autoneg = 1; @@ -4384,13 +4393,13 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) /* advertise only what has been requested */ adv = mii_rw(dev, np->phyaddr, MII_ADVERTISE, MII_READ); adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM); - if (ecmd->advertising & ADVERTISED_10baseT_Half) + if (advertising & ADVERTISED_10baseT_Half) adv |= ADVERTISE_10HALF; - if (ecmd->advertising & ADVERTISED_10baseT_Full) + if (advertising & ADVERTISED_10baseT_Full) adv |= ADVERTISE_10FULL; - if (ecmd->advertising & ADVERTISED_100baseT_Half) + if (advertising & ADVERTISED_100baseT_Half) adv |= ADVERTISE_100HALF; - if (ecmd->advertising & ADVERTISED_100baseT_Full) + if (advertising & ADVERTISED_100baseT_Full) adv |= ADVERTISE_100FULL; if (np->pause_flags & NV_PAUSEFRAME_RX_REQ) /* for rx we set both advertisements but disable tx pause */ adv |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; @@ -4401,7 +4410,7 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) if (np->gigabit == PHY_GIGABIT) { adv = mii_rw(dev, np->phyaddr, MII_CTRL1000, MII_READ); adv &= ~ADVERTISE_1000FULL; - if (ecmd->advertising & ADVERTISED_1000baseT_Full) + if (advertising & ADVERTISED_1000baseT_Full) adv |= ADVERTISE_1000FULL; mii_rw(dev, np->phyaddr, MII_CTRL1000, adv); } @@ -4428,13 +4437,13 @@ static int nv_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) adv = mii_rw(dev, np->phyaddr, MII_ADVERTISE, MII_READ); adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM); - if (speed == SPEED_10 && ecmd->duplex == DUPLEX_HALF) + if (speed == SPEED_10 && cmd->base.duplex == DUPLEX_HALF) adv |= ADVERTISE_10HALF; - if (speed == SPEED_10 && ecmd->duplex == DUPLEX_FULL) + if (speed == SPEED_10 && cmd->base.duplex == DUPLEX_FULL) adv |= ADVERTISE_10FULL; - if (speed == SPEED_100 && ecmd->duplex == DUPLEX_HALF) + if (speed == SPEED_100 && cmd->base.duplex == DUPLEX_HALF) adv |= ADVERTISE_100HALF; - if (speed == SPEED_100 && ecmd->duplex == DUPLEX_FULL) + if (speed == SPEED_100 && cmd->base.duplex == DUPLEX_FULL) adv |= ADVERTISE_100FULL; np->pause_flags &= ~(NV_PAUSEFRAME_AUTONEG|NV_PAUSEFRAME_RX_ENABLE|NV_PAUSEFRAME_TX_ENABLE); if (np->pause_flags & NV_PAUSEFRAME_RX_REQ) {/* for rx we set both advertisements but disable tx pause */ @@ -5241,8 +5250,6 @@ static const struct ethtool_ops ops = { .get_link = ethtool_op_get_link, .get_wol = nv_get_wol, .set_wol = nv_set_wol, - .get_settings = nv_get_settings, - .set_settings = nv_set_settings, .get_regs_len = nv_get_regs_len, .get_regs = nv_get_regs, .nway_reset = nv_nway_reset, @@ -5255,6 +5262,8 @@ static const struct ethtool_ops ops = { .get_sset_count = nv_get_sset_count, .self_test = nv_self_test, .get_ts_info = ethtool_op_get_ts_info, + .get_link_ksettings = nv_get_link_ksettings, + .set_link_ksettings = nv_set_link_ksettings, }; /* The mgmt unit and driver use a semaphore to access the phy during init */ diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c index b19be7c6c1f4..21093276d2b7 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c @@ -73,62 +73,80 @@ static const struct pch_gbe_stats pch_gbe_gstrings_stats[] = { #define PCH_GBE_MAC_REGS_LEN (sizeof(struct pch_gbe_regs) / 4) #define PCH_GBE_REGS_LEN (PCH_GBE_MAC_REGS_LEN + PCH_GBE_PHY_REGS_LEN) /** - * pch_gbe_get_settings - Get device-specific settings + * pch_gbe_get_link_ksettings - Get device-specific settings * @netdev: Network interface device structure * @ecmd: Ethtool command * Returns: * 0: Successful. * Negative value: Failed. */ -static int pch_gbe_get_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static int pch_gbe_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *ecmd) { struct pch_gbe_adapter *adapter = netdev_priv(netdev); + u32 supported, advertising; int ret; - ret = mii_ethtool_gset(&adapter->mii, ecmd); - ecmd->supported &= ~(SUPPORTED_TP | SUPPORTED_1000baseT_Half); - ecmd->advertising &= ~(ADVERTISED_TP | ADVERTISED_1000baseT_Half); + ret = mii_ethtool_get_link_ksettings(&adapter->mii, ecmd); + + ethtool_convert_link_mode_to_legacy_u32(&supported, + ecmd->link_modes.supported); + ethtool_convert_link_mode_to_legacy_u32(&advertising, + ecmd->link_modes.advertising); + + supported &= ~(SUPPORTED_TP | SUPPORTED_1000baseT_Half); + advertising &= ~(ADVERTISED_TP | ADVERTISED_1000baseT_Half); + + ethtool_convert_legacy_u32_to_link_mode(ecmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(ecmd->link_modes.advertising, + advertising); if (!netif_carrier_ok(adapter->netdev)) - ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN); + ecmd->base.speed = SPEED_UNKNOWN; return ret; } /** - * pch_gbe_set_settings - Set device-specific settings + * pch_gbe_set_link_ksettings - Set device-specific settings * @netdev: Network interface device structure * @ecmd: Ethtool command * Returns: * 0: Successful. * Negative value: Failed. */ -static int pch_gbe_set_settings(struct net_device *netdev, - struct ethtool_cmd *ecmd) +static int pch_gbe_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *ecmd) { struct pch_gbe_adapter *adapter = netdev_priv(netdev); struct pch_gbe_hw *hw = &adapter->hw; - u32 speed = ethtool_cmd_speed(ecmd); + struct ethtool_link_ksettings copy_ecmd; + u32 speed = ecmd->base.speed; + u32 advertising; int ret; pch_gbe_hal_write_phy_reg(hw, MII_BMCR, BMCR_RESET); + memcpy(©_ecmd, ecmd, sizeof(*ecmd)); + /* when set_settings() is called with a ethtool_cmd previously * filled by get_settings() on a down link, speed is -1: */ if (speed == UINT_MAX) { speed = SPEED_1000; - ethtool_cmd_speed_set(ecmd, speed); - ecmd->duplex = DUPLEX_FULL; + copy_ecmd.base.speed = speed; + copy_ecmd.base.duplex = DUPLEX_FULL; } - ret = mii_ethtool_sset(&adapter->mii, ecmd); + ret = mii_ethtool_set_link_ksettings(&adapter->mii, ©_ecmd); if (ret) { - netdev_err(netdev, "Error: mii_ethtool_sset\n"); + netdev_err(netdev, "Error: mii_ethtool_set_link_ksettings\n"); return ret; } hw->mac.link_speed = speed; - hw->mac.link_duplex = ecmd->duplex; - hw->phy.autoneg_advertised = ecmd->advertising; - hw->mac.autoneg = ecmd->autoneg; + hw->mac.link_duplex = copy_ecmd.base.duplex; + ethtool_convert_link_mode_to_legacy_u32( + &advertising, copy_ecmd.link_modes.advertising); + hw->phy.autoneg_advertised = advertising; + hw->mac.autoneg = copy_ecmd.base.autoneg; /* reset the link */ if (netif_running(adapter->netdev)) { @@ -487,8 +505,6 @@ static int pch_gbe_get_sset_count(struct net_device *netdev, int sset) } static const struct ethtool_ops pch_gbe_ethtool_ops = { - .get_settings = pch_gbe_get_settings, - .set_settings = pch_gbe_set_settings, .get_drvinfo = pch_gbe_get_drvinfo, .get_regs_len = pch_gbe_get_regs_len, .get_regs = pch_gbe_get_regs, @@ -503,6 +519,8 @@ static const struct ethtool_ops pch_gbe_ethtool_ops = { .get_strings = pch_gbe_get_strings, .get_ethtool_stats = pch_gbe_get_ethtool_stats, .get_sset_count = pch_gbe_get_sset_count, + .get_link_ksettings = pch_gbe_get_link_ksettings, + .set_link_ksettings = pch_gbe_set_link_ksettings, }; void pch_gbe_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index f9e4e8eca665..5ae9681a2da7 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2149,17 +2149,6 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } /** - * pch_gbe_get_stats - Get System Network Statistics - * @netdev: Network interface device structure - * Returns: The current stats - */ -static struct net_device_stats *pch_gbe_get_stats(struct net_device *netdev) -{ - /* only return the current stats */ - return &netdev->stats; -} - -/** * pch_gbe_set_multi - Multicast and Promiscuous mode set * @netdev: Network interface device structure */ @@ -2420,7 +2409,6 @@ static const struct net_device_ops pch_gbe_netdev_ops = { .ndo_open = pch_gbe_open, .ndo_stop = pch_gbe_stop, .ndo_start_xmit = pch_gbe_xmit_frame, - .ndo_get_stats = pch_gbe_get_stats, .ndo_set_mac_address = pch_gbe_set_mac, .ndo_tx_timeout = pch_gbe_tx_timeout, .ndo_change_mtu = pch_gbe_change_mtu, diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index 3cfd10503446..aaa1e8517348 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -104,6 +104,7 @@ config QED_SRIOV config QEDE tristate "QLogic QED 25/40/100Gb Ethernet NIC" depends on QED + imply PTP_1588_CLOCK ---help--- This enables the support for ... diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index 729e43768e99..1a7300f72cab 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_QED) := qed.o qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o \ - qed_selftest.o qed_dcbx.o qed_debug.o + qed_selftest.o qed_dcbx.o qed_debug.o qed_ptp.o qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o qed-$(CONFIG_QED_LL2) += qed_ll2.o qed-$(CONFIG_QED_RDMA) += qed_roce.o diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 1f61cf3209e8..6557f94b92ed 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -456,6 +456,8 @@ struct qed_hwfn { u8 dcbx_no_edpm; u8 db_bar_no_edpm; + /* p_ptp_ptt is valid for leading HWFN only */ + struct qed_ptt *p_ptp_ptt; struct qed_simd_fp_handler simd_proto_handler[64]; #ifdef CONFIG_QED_SRIOV diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 7520eb34ad00..df932be5a4e5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -214,6 +214,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, p_ramrod->vport_id = abs_vport_id; p_ramrod->mtu = cpu_to_le16(p_params->mtu); + p_ramrod->handle_ptp_pkts = p_params->handle_ptp_pkts; p_ramrod->inner_vlan_removal_en = p_params->remove_inner_vlan; p_ramrod->drop_ttl0_en = p_params->drop_ttl0; p_ramrod->untagged = p_params->only_untagged; @@ -1886,6 +1887,7 @@ static int qed_start_vport(struct qed_dev *cdev, start.drop_ttl0 = params->drop_ttl0; start.opaque_fid = p_hwfn->hw_info.opaque_fid; start.concrete_fid = p_hwfn->hw_info.concrete_fid; + start.handle_ptp_pkts = params->handle_ptp_pkts; start.vport_id = params->vport_id; start.max_buffers_per_cqe = 16; start.mtu = params->mtu; @@ -2328,6 +2330,8 @@ extern const struct qed_iov_hv_ops qed_iov_ops_pass; extern const struct qed_eth_dcbnl_ops qed_dcbnl_ops_pass; #endif +extern const struct qed_eth_ptp_ops qed_ptp_ops_pass; + static const struct qed_eth_ops qed_eth_ops_pass = { .common = &qed_common_ops_pass, #ifdef CONFIG_QED_SRIOV @@ -2336,6 +2340,7 @@ static const struct qed_eth_ops qed_eth_ops_pass = { #ifdef CONFIG_DCB .dcb = &qed_dcbnl_ops_pass, #endif + .ptp = &qed_ptp_ops_pass, .fill_dev_info = &qed_fill_eth_dev_info, .register_ops = &qed_register_eth_ops, .check_mac = &qed_check_mac, diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index 93cb932ef663..e763abd334f6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -156,6 +156,7 @@ struct qed_sp_vport_start_params { enum qed_tpa_mode tpa_mode; bool remove_inner_vlan; bool tx_switching; + bool handle_ptp_pkts; bool only_untagged; bool drop_ttl0; u8 max_buffers_per_cqe; diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 93eee83ccdc3..592e104687a7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -902,6 +902,7 @@ static int qed_slowpath_start(struct qed_dev *cdev, struct qed_mcp_drv_version drv_version; const u8 *data = NULL; struct qed_hwfn *hwfn; + struct qed_ptt *p_ptt; int rc = -EINVAL; if (qed_iov_wq_start(cdev)) @@ -916,6 +917,14 @@ static int qed_slowpath_start(struct qed_dev *cdev, QED_FW_FILE_NAME); goto err; } + + p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); + if (p_ptt) { + QED_LEADING_HWFN(cdev)->p_ptp_ptt = p_ptt; + } else { + DP_NOTICE(cdev, "Failed to acquire PTT for PTP\n"); + goto err; + } } cdev->rx_coalesce_usecs = QED_DEFAULT_RX_USECS; @@ -1003,6 +1012,10 @@ err: if (IS_PF(cdev)) release_firmware(cdev->firmware); + if (IS_PF(cdev) && QED_LEADING_HWFN(cdev)->p_ptp_ptt) + qed_ptt_release(QED_LEADING_HWFN(cdev), + QED_LEADING_HWFN(cdev)->p_ptp_ptt); + qed_iov_wq_stop(cdev, false); return rc; @@ -1016,6 +1029,8 @@ static int qed_slowpath_stop(struct qed_dev *cdev) qed_ll2_dealloc_if(cdev); if (IS_PF(cdev)) { + qed_ptt_release(QED_LEADING_HWFN(cdev), + QED_LEADING_HWFN(cdev)->p_ptp_ptt); qed_free_stream_mem(cdev); if (IS_QED_ETH_IF(cdev)) qed_sriov_disable(cdev, true); diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c new file mode 100644 index 000000000000..d27aa85da23c --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c @@ -0,0 +1,323 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/types.h> +#include "qed.h" +#include "qed_dev_api.h" +#include "qed_hw.h" +#include "qed_l2.h" +#include "qed_ptp.h" +#include "qed_reg_addr.h" + +/* 16 nano second time quantas to wait before making a Drift adjustment */ +#define QED_DRIFT_CNTR_TIME_QUANTA_SHIFT 0 +/* Nano seconds to add/subtract when making a Drift adjustment */ +#define QED_DRIFT_CNTR_ADJUSTMENT_SHIFT 28 +/* Add/subtract the Adjustment_Value when making a Drift adjustment */ +#define QED_DRIFT_CNTR_DIRECTION_SHIFT 31 +#define QED_TIMESTAMP_MASK BIT(16) + +/* Read Rx timestamp */ +static int qed_ptp_hw_read_rx_ts(struct qed_dev *cdev, u64 *timestamp) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = p_hwfn->p_ptp_ptt; + u32 val; + + *timestamp = 0; + val = qed_rd(p_hwfn, p_ptt, NIG_REG_LLH_PTP_HOST_BUF_SEQID); + if (!(val & QED_TIMESTAMP_MASK)) { + DP_INFO(p_hwfn, "Invalid Rx timestamp, buf_seqid = %d\n", val); + return -EINVAL; + } + + val = qed_rd(p_hwfn, p_ptt, NIG_REG_LLH_PTP_HOST_BUF_TS_LSB); + *timestamp = qed_rd(p_hwfn, p_ptt, NIG_REG_LLH_PTP_HOST_BUF_TS_MSB); + *timestamp <<= 32; + *timestamp |= val; + + /* Reset timestamp register to allow new timestamp */ + qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_HOST_BUF_SEQID, + QED_TIMESTAMP_MASK); + + return 0; +} + +/* Read Tx timestamp */ +static int qed_ptp_hw_read_tx_ts(struct qed_dev *cdev, u64 *timestamp) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = p_hwfn->p_ptp_ptt; + u32 val; + + *timestamp = 0; + val = qed_rd(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_BUF_SEQID); + if (!(val & QED_TIMESTAMP_MASK)) { + DP_INFO(p_hwfn, "Invalid Tx timestamp, buf_seqid = %d\n", val); + return -EINVAL; + } + + val = qed_rd(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_BUF_TS_LSB); + *timestamp = qed_rd(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_BUF_TS_MSB); + *timestamp <<= 32; + *timestamp |= val; + + /* Reset timestamp register to allow new timestamp */ + qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_BUF_SEQID, QED_TIMESTAMP_MASK); + + return 0; +} + +/* Read Phy Hardware Clock */ +static int qed_ptp_hw_read_cc(struct qed_dev *cdev, u64 *phc_cycles) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = p_hwfn->p_ptp_ptt; + u32 temp = 0; + + temp = qed_rd(p_hwfn, p_ptt, NIG_REG_TSGEN_SYNC_TIME_LSB); + *phc_cycles = qed_rd(p_hwfn, p_ptt, NIG_REG_TSGEN_SYNC_TIME_MSB); + *phc_cycles <<= 32; + *phc_cycles |= temp; + + return 0; +} + +/* Filter PTP protocol packets that need to be timestamped */ +static int qed_ptp_hw_cfg_rx_filters(struct qed_dev *cdev, + enum qed_ptp_filter_type type) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = p_hwfn->p_ptp_ptt; + u32 rule_mask, parm_mask; + + switch (type) { + case QED_PTP_FILTER_L2_IPV4_IPV6: + parm_mask = 0x6AA; + rule_mask = 0x3EEE; + break; + case QED_PTP_FILTER_L2: + parm_mask = 0x6BF; + rule_mask = 0x3EFF; + break; + case QED_PTP_FILTER_IPV4_IPV6: + parm_mask = 0x7EA; + rule_mask = 0x3FFE; + break; + case QED_PTP_FILTER_IPV4: + parm_mask = 0x7EE; + rule_mask = 0x3FFE; + break; + default: + DP_INFO(p_hwfn, "Invalid PTP filter type %d\n", type); + return -EINVAL; + } + + qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK, parm_mask); + qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_RULE_MASK, rule_mask); + + qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_TO_HOST, 0x1); + + /* Reset possibly old timestamps */ + qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_HOST_BUF_SEQID, + QED_TIMESTAMP_MASK); + + return 0; +} + +/* Adjust the HW clock by a rate given in parts-per-billion (ppb) units. + * FW/HW accepts the adjustment value in terms of 3 parameters: + * Drift period - adjustment happens once in certain number of nano seconds. + * Drift value - time is adjusted by a certain value, for example by 5 ns. + * Drift direction - add or subtract the adjustment value. + * The routine translates ppb into the adjustment triplet in an optimal manner. + */ +static int qed_ptp_hw_adjfreq(struct qed_dev *cdev, s32 ppb) +{ + s64 best_val = 0, val, best_period = 0, period, approx_dev, dif, dif2; + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = p_hwfn->p_ptp_ptt; + u32 drift_ctr_cfg = 0, drift_state; + int drift_dir = 1; + + if (ppb < 0) { + ppb = -ppb; + drift_dir = 0; + } + + if (ppb > 1) { + s64 best_dif = ppb, best_approx_dev = 1; + + /* Adjustment value is up to +/-7ns, find an optimal value in + * this range. + */ + for (val = 7; val > 0; val--) { + period = div_s64(val * 1000000000, ppb); + period -= 8; + period >>= 4; + if (period < 1) + period = 1; + if (period > 0xFFFFFFE) + period = 0xFFFFFFE; + + /* Check both rounding ends for approximate error */ + approx_dev = period * 16 + 8; + dif = ppb * approx_dev - val * 1000000000; + dif2 = dif + 16 * ppb; + + if (dif < 0) + dif = -dif; + if (dif2 < 0) + dif2 = -dif2; + + /* Determine which end gives better approximation */ + if (dif * (approx_dev + 16) > dif2 * approx_dev) { + period++; + approx_dev += 16; + dif = dif2; + } + + /* Track best approximation found so far */ + if (best_dif * approx_dev > dif * best_approx_dev) { + best_dif = dif; + best_val = val; + best_period = period; + best_approx_dev = approx_dev; + } + } + } else if (ppb == 1) { + /* This is a special case as its the only value which wouldn't + * fit in a s64 variable. In order to prevent castings simple + * handle it seperately. + */ + best_val = 4; + best_period = 0xee6b27f; + } else { + best_val = 0; + best_period = 0xFFFFFFF; + } + + drift_ctr_cfg = (best_period << QED_DRIFT_CNTR_TIME_QUANTA_SHIFT) | + (((int)best_val) << QED_DRIFT_CNTR_ADJUSTMENT_SHIFT) | + (((int)drift_dir) << QED_DRIFT_CNTR_DIRECTION_SHIFT); + + qed_wr(p_hwfn, p_ptt, NIG_REG_TSGEN_RST_DRIFT_CNTR, 0x1); + + drift_state = qed_rd(p_hwfn, p_ptt, NIG_REG_TSGEN_RST_DRIFT_CNTR); + if (drift_state & 1) { + qed_wr(p_hwfn, p_ptt, NIG_REG_TSGEN_DRIFT_CNTR_CONF, + drift_ctr_cfg); + } else { + DP_INFO(p_hwfn, "Drift counter is not reset\n"); + return -EINVAL; + } + + qed_wr(p_hwfn, p_ptt, NIG_REG_TSGEN_RST_DRIFT_CNTR, 0x0); + + return 0; +} + +static int qed_ptp_hw_enable(struct qed_dev *cdev) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = p_hwfn->p_ptp_ptt; + + /* Reset PTP event detection rules - will be configured in the IOCTL */ + qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK, 0x7FF); + qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_RULE_MASK, 0x3FFF); + qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_PARAM_MASK, 0x7FF); + qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_RULE_MASK, 0x3FFF); + + qed_wr(p_hwfn, p_ptt, NIG_REG_TX_PTP_EN, 7); + qed_wr(p_hwfn, p_ptt, NIG_REG_RX_PTP_EN, 7); + + qed_wr(p_hwfn, p_ptt, NIG_REG_TS_OUTPUT_ENABLE_PDA, 0x1); + + /* Pause free running counter */ + qed_wr(p_hwfn, p_ptt, NIG_REG_TIMESYNC_GEN_REG_BB, 2); + + qed_wr(p_hwfn, p_ptt, NIG_REG_TSGEN_FREE_CNT_VALUE_LSB, 0); + qed_wr(p_hwfn, p_ptt, NIG_REG_TSGEN_FREE_CNT_VALUE_MSB, 0); + /* Resume free running counter */ + qed_wr(p_hwfn, p_ptt, NIG_REG_TIMESYNC_GEN_REG_BB, 4); + + /* Disable drift register */ + qed_wr(p_hwfn, p_ptt, NIG_REG_TSGEN_DRIFT_CNTR_CONF, 0x0); + qed_wr(p_hwfn, p_ptt, NIG_REG_TSGEN_RST_DRIFT_CNTR, 0x0); + + /* Reset possibly old timestamps */ + qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_HOST_BUF_SEQID, + QED_TIMESTAMP_MASK); + qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_BUF_SEQID, QED_TIMESTAMP_MASK); + + return 0; +} + +static int qed_ptp_hw_hwtstamp_tx_on(struct qed_dev *cdev) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = p_hwfn->p_ptp_ptt; + + qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_PARAM_MASK, 0x6AA); + qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_RULE_MASK, 0x3EEE); + + return 0; +} + +static int qed_ptp_hw_disable(struct qed_dev *cdev) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = p_hwfn->p_ptp_ptt; + + /* Reset PTP event detection rules */ + qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK, 0x7FF); + qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_RULE_MASK, 0x3FFF); + + qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_PARAM_MASK, 0x7FF); + qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_RULE_MASK, 0x3FFF); + + /* Disable the PTP feature */ + qed_wr(p_hwfn, p_ptt, NIG_REG_RX_PTP_EN, 0x0); + qed_wr(p_hwfn, p_ptt, NIG_REG_TX_PTP_EN, 0x0); + + return 0; +} + +const struct qed_eth_ptp_ops qed_ptp_ops_pass = { + .hwtstamp_tx_on = qed_ptp_hw_hwtstamp_tx_on, + .cfg_rx_filters = qed_ptp_hw_cfg_rx_filters, + .read_rx_ts = qed_ptp_hw_read_rx_ts, + .read_tx_ts = qed_ptp_hw_read_tx_ts, + .read_cc = qed_ptp_hw_read_cc, + .adjfreq = qed_ptp_hw_adjfreq, + .disable = qed_ptp_hw_disable, + .enable = qed_ptp_hw_enable, +}; diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.h b/drivers/net/ethernet/qlogic/qed/qed_ptp.h new file mode 100644 index 000000000000..63c666d0b739 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.h @@ -0,0 +1,47 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _QED_PTP_H +#define _QED_PTP_H +#include <linux/types.h> + +int qed_ptp_hwtstamp_tx_on(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +int qed_ptp_cfg_rx_filters(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + enum qed_ptp_filter_type type); +int qed_ptp_read_rx_ts(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u64 *ts); +int qed_ptp_read_tx_ts(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u64 *ts); +int qed_ptp_read_cc(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u64 *cycles); +int qed_ptp_adjfreq(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, s32 ppb); +int qed_ptp_disable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +int qed_ptp_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); + +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index b6722c6ff761..3b7edf6ff234 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -1481,4 +1481,35 @@ #define DORQ_REG_PF_ICID_BIT_SHIFT_NORM 0x100448UL #define DORQ_REG_PF_MIN_ADDR_REG1 0x100400UL #define DORQ_REG_PF_DPI_BIT_SHIFT 0x100450UL +#define NIG_REG_RX_PTP_EN 0x501900UL +#define NIG_REG_TX_PTP_EN 0x501904UL +#define NIG_REG_LLH_PTP_TO_HOST 0x501908UL +#define NIG_REG_LLH_PTP_TO_MCP 0x50190cUL +#define NIG_REG_PTP_SW_TXTSEN 0x501910UL +#define NIG_REG_LLH_PTP_ETHERTYPE_1 0x501914UL +#define NIG_REG_LLH_PTP_MAC_DA_2_LSB 0x501918UL +#define NIG_REG_LLH_PTP_MAC_DA_2_MSB 0x50191cUL +#define NIG_REG_LLH_PTP_PARAM_MASK 0x501920UL +#define NIG_REG_LLH_PTP_RULE_MASK 0x501924UL +#define NIG_REG_TX_LLH_PTP_PARAM_MASK 0x501928UL +#define NIG_REG_TX_LLH_PTP_RULE_MASK 0x50192cUL +#define NIG_REG_LLH_PTP_HOST_BUF_SEQID 0x501930UL +#define NIG_REG_LLH_PTP_HOST_BUF_TS_LSB 0x501934UL +#define NIG_REG_LLH_PTP_HOST_BUF_TS_MSB 0x501938UL +#define NIG_REG_LLH_PTP_MCP_BUF_SEQID 0x50193cUL +#define NIG_REG_LLH_PTP_MCP_BUF_TS_LSB 0x501940UL +#define NIG_REG_LLH_PTP_MCP_BUF_TS_MSB 0x501944UL +#define NIG_REG_TX_LLH_PTP_BUF_SEQID 0x501948UL +#define NIG_REG_TX_LLH_PTP_BUF_TS_LSB 0x50194cUL +#define NIG_REG_TX_LLH_PTP_BUF_TS_MSB 0x501950UL +#define NIG_REG_RX_PTP_TS_MSB_ERR 0x501954UL +#define NIG_REG_TX_PTP_TS_MSB_ERR 0x501958UL +#define NIG_REG_TSGEN_SYNC_TIME_LSB 0x5088c0UL +#define NIG_REG_TSGEN_SYNC_TIME_MSB 0x5088c4UL +#define NIG_REG_TSGEN_RST_DRIFT_CNTR 0x5088d8UL +#define NIG_REG_TSGEN_DRIFT_CNTR_CONF 0x5088dcUL +#define NIG_REG_TS_OUTPUT_ENABLE_PDA 0x508870UL +#define NIG_REG_TIMESYNC_GEN_REG_BB 0x500d00UL +#define NIG_REG_TSGEN_FREE_CNT_VALUE_LSB 0x5088a8UL +#define NIG_REG_TSGEN_FREE_CNT_VALUE_MSB 0x5088acUL #endif diff --git a/drivers/net/ethernet/qlogic/qede/Makefile b/drivers/net/ethernet/qlogic/qede/Makefile index 38fbee6a442b..bc5f7c3b277d 100644 --- a/drivers/net/ethernet/qlogic/qede/Makefile +++ b/drivers/net/ethernet/qlogic/qede/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_QEDE) := qede.o -qede-y := qede_main.o qede_fp.o qede_filter.o qede_ethtool.o +qede-y := qede_main.o qede_fp.o qede_filter.o qede_ethtool.o qede_ptp.o qede-$(CONFIG_DCB) += qede_dcbnl.o qede-$(CONFIG_QED_RDMA) += qede_roce.o diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index b4234066689b..f2aaef2cfb86 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -137,6 +137,8 @@ struct qede_rdma_dev { struct workqueue_struct *roce_wq; }; +struct qede_ptp; + struct qede_dev { struct qed_dev *cdev; struct net_device *ndev; @@ -148,8 +150,10 @@ struct qede_dev { u32 flags; #define QEDE_FLAG_IS_VF BIT(0) #define IS_VF(edev) (!!((edev)->flags & QEDE_FLAG_IS_VF)) +#define QEDE_TX_TIMESTAMPING_EN BIT(1) const struct qed_eth_ops *ops; + struct qede_ptp *ptp; struct qed_dev_eth_info dev_info; #define QEDE_MAX_RSS_CNT(edev) ((edev)->dev_info.num_queues) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index baf264225c12..c02754ddc0e9 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -39,6 +39,7 @@ #include <linux/capability.h> #include <linux/vmalloc.h> #include "qede.h" +#include "qede_ptp.h" #define QEDE_RQSTAT_OFFSET(stat_name) \ (offsetof(struct qede_rx_queue, stat_name)) @@ -940,6 +941,14 @@ static int qede_set_channels(struct net_device *dev, return 0; } +static int qede_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct qede_dev *edev = netdev_priv(dev); + + return qede_ptp_get_ts_info(edev, info); +} + static int qede_set_phys_id(struct net_device *dev, enum ethtool_phys_id_state state) { @@ -1586,6 +1595,7 @@ static const struct ethtool_ops qede_ethtool_ops = { .get_rxfh_key_size = qede_get_rxfh_key_size, .get_rxfh = qede_get_rxfh, .set_rxfh = qede_set_rxfh, + .get_ts_info = qede_get_ts_info, .get_channels = qede_get_channels, .set_channels = qede_set_channels, .self_test = qede_self_test, diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 26848eed3bc1..1e65038c8fc0 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -40,6 +40,7 @@ #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <net/ip6_checksum.h> +#include "qede_ptp.h" #include <linux/qed/qed_if.h> #include "qede.h" @@ -1277,6 +1278,7 @@ static int qede_rx_process_cqe(struct qede_dev *edev, qede_get_rxhash(skb, fp_cqe->bitfields, fp_cqe->rss_hash); qede_set_skb_csum(skb, csum_flag); skb_record_rx_queue(skb, rxq->rxq_id); + qede_ptp_record_rx_ts(edev, cqe, skb); /* SKB is prepared - pass it to stack */ qede_skb_receive(edev, fp, rxq, skb, le16_to_cpu(fp_cqe->vlan_tag)); @@ -1451,6 +1453,9 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev) first_bd->data.bd_flags.bitfields = 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + qede_ptp_tx_ts(edev, skb); + /* Map skb linear data for DMA and set in the first BD */ mapping = dma_map_single(txq->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 40a76a1d5973..d163e72aa2a6 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -62,6 +62,7 @@ #include <linux/vmalloc.h> #include <linux/qed/qede_roce.h> #include "qede.h" +#include "qede_ptp.h" static char version[] = "QLogic FastLinQ 4xxxx Ethernet Driver qede " DRV_MODULE_VERSION "\n"; @@ -484,6 +485,25 @@ static int qede_set_vf_trust(struct net_device *dev, int vfidx, bool setting) } #endif +static int qede_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct qede_dev *edev = netdev_priv(dev); + + if (!netif_running(dev)) + return -EAGAIN; + + switch (cmd) { + case SIOCSHWTSTAMP: + return qede_ptp_hw_ts(edev, ifr); + default: + DP_VERBOSE(edev, QED_MSG_DEBUG, + "default IOCTL cmd 0x%x\n", cmd); + return -EOPNOTSUPP; + } + + return 0; +} + static const struct net_device_ops qede_netdev_ops = { .ndo_open = qede_open, .ndo_stop = qede_close, @@ -492,6 +512,7 @@ static const struct net_device_ops qede_netdev_ops = { .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = qede_change_mtu, + .ndo_do_ioctl = qede_ioctl, #ifdef CONFIG_QED_SRIOV .ndo_set_vf_mac = qede_set_vf_mac, .ndo_set_vf_vlan = qede_set_vf_vlan, @@ -841,6 +862,15 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, edev->ops->common->set_id(cdev, edev->ndev->name, DRV_MODULE_VERSION); + /* PTP not supported on VFs */ + if (!is_vf) { + rc = qede_ptp_register_phc(edev); + if (rc) { + DP_NOTICE(edev, "Cannot register PHC\n"); + goto err5; + } + } + edev->ops->register_ops(cdev, &qede_ll_ops, edev); #ifdef CONFIG_DCB @@ -856,6 +886,8 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, return 0; +err5: + unregister_netdev(edev->ndev); err4: qede_roce_dev_remove(edev); err3: @@ -907,6 +939,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) unregister_netdev(ndev); + qede_ptp_remove(edev); + qede_roce_dev_remove(edev); edev->ops->common->set_power_state(cdev, PCI_D0); @@ -1660,6 +1694,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats) if (!vport_update_params) return -ENOMEM; + start.handle_ptp_pkts = !!(edev->ptp); start.gro_enable = !edev->gro_disable; start.mtu = edev->ndev->mtu; start.vport_id = 0; @@ -1781,6 +1816,8 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, qede_roce_dev_event_close(edev); edev->state = QEDE_STATE_CLOSED; + qede_ptp_stop(edev); + /* Close OS Tx */ netif_tx_disable(edev->ndev); netif_carrier_off(edev->ndev); @@ -1882,6 +1919,8 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, qede_roce_dev_event_open(edev); qede_link_update(edev, &link_output); + qede_ptp_start(edev, (mode == QEDE_LOAD_NORMAL)); + edev->state = QEDE_STATE_OPEN; DP_INFO(edev, "Ending successfully qede load\n"); diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c new file mode 100644 index 000000000000..2e62dec09bd7 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -0,0 +1,536 @@ +/* QLogic qede NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "qede_ptp.h" + +struct qede_ptp { + const struct qed_eth_ptp_ops *ops; + struct ptp_clock_info clock_info; + struct cyclecounter cc; + struct timecounter tc; + struct ptp_clock *clock; + struct work_struct work; + struct qede_dev *edev; + struct sk_buff *tx_skb; + + /* ptp spinlock is used for protecting the cycle/time counter fields + * and, also for serializing the qed PTP API invocations. + */ + spinlock_t lock; + bool hw_ts_ioctl_called; + u16 tx_type; + u16 rx_filter; +}; + +/** + * qede_ptp_adjfreq + * @ptp: the ptp clock structure + * @ppb: parts per billion adjustment from base + * + * Adjust the frequency of the ptp cycle counter by the + * indicated ppb from the base frequency. + */ +static int qede_ptp_adjfreq(struct ptp_clock_info *info, s32 ppb) +{ + struct qede_ptp *ptp = container_of(info, struct qede_ptp, clock_info); + struct qede_dev *edev = ptp->edev; + int rc; + + __qede_lock(edev); + if (edev->state == QEDE_STATE_OPEN) { + spin_lock_bh(&ptp->lock); + rc = ptp->ops->adjfreq(edev->cdev, ppb); + spin_unlock_bh(&ptp->lock); + } else { + DP_ERR(edev, "PTP adjfreq called while interface is down\n"); + rc = -EFAULT; + } + __qede_unlock(edev); + + return rc; +} + +static int qede_ptp_adjtime(struct ptp_clock_info *info, s64 delta) +{ + struct qede_dev *edev; + struct qede_ptp *ptp; + + ptp = container_of(info, struct qede_ptp, clock_info); + edev = ptp->edev; + + DP_VERBOSE(edev, QED_MSG_DEBUG, "PTP adjtime called, delta = %llx\n", + delta); + + spin_lock_bh(&ptp->lock); + timecounter_adjtime(&ptp->tc, delta); + spin_unlock_bh(&ptp->lock); + + return 0; +} + +static int qede_ptp_gettime(struct ptp_clock_info *info, struct timespec64 *ts) +{ + struct qede_dev *edev; + struct qede_ptp *ptp; + u64 ns; + + ptp = container_of(info, struct qede_ptp, clock_info); + edev = ptp->edev; + + spin_lock_bh(&ptp->lock); + ns = timecounter_read(&ptp->tc); + spin_unlock_bh(&ptp->lock); + + DP_VERBOSE(edev, QED_MSG_DEBUG, "PTP gettime called, ns = %llu\n", ns); + + *ts = ns_to_timespec64(ns); + + return 0; +} + +static int qede_ptp_settime(struct ptp_clock_info *info, + const struct timespec64 *ts) +{ + struct qede_dev *edev; + struct qede_ptp *ptp; + u64 ns; + + ptp = container_of(info, struct qede_ptp, clock_info); + edev = ptp->edev; + + ns = timespec64_to_ns(ts); + + DP_VERBOSE(edev, QED_MSG_DEBUG, "PTP settime called, ns = %llu\n", ns); + + /* Re-init the timecounter */ + spin_lock_bh(&ptp->lock); + timecounter_init(&ptp->tc, &ptp->cc, ns); + spin_unlock_bh(&ptp->lock); + + return 0; +} + +/* Enable (or disable) ancillary features of the phc subsystem */ +static int qede_ptp_ancillary_feature_enable(struct ptp_clock_info *info, + struct ptp_clock_request *rq, + int on) +{ + struct qede_dev *edev; + struct qede_ptp *ptp; + + ptp = container_of(info, struct qede_ptp, clock_info); + edev = ptp->edev; + + DP_ERR(edev, "PHC ancillary features are not supported\n"); + + return -ENOTSUPP; +} + +static void qede_ptp_task(struct work_struct *work) +{ + struct skb_shared_hwtstamps shhwtstamps; + struct qede_dev *edev; + struct qede_ptp *ptp; + u64 timestamp, ns; + int rc; + + ptp = container_of(work, struct qede_ptp, work); + edev = ptp->edev; + + /* Read Tx timestamp registers */ + spin_lock_bh(&ptp->lock); + rc = ptp->ops->read_tx_ts(edev->cdev, ×tamp); + spin_unlock_bh(&ptp->lock); + if (rc) { + /* Reschedule to keep checking for a valid timestamp value */ + schedule_work(&ptp->work); + return; + } + + ns = timecounter_cyc2time(&ptp->tc, timestamp); + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + shhwtstamps.hwtstamp = ns_to_ktime(ns); + skb_tstamp_tx(ptp->tx_skb, &shhwtstamps); + dev_kfree_skb_any(ptp->tx_skb); + ptp->tx_skb = NULL; + + DP_VERBOSE(edev, QED_MSG_DEBUG, + "Tx timestamp, timestamp cycles = %llu, ns = %llu\n", + timestamp, ns); +} + +/* Read the PHC. This API is invoked with ptp_lock held. */ +static u64 qede_ptp_read_cc(const struct cyclecounter *cc) +{ + struct qede_dev *edev; + struct qede_ptp *ptp; + u64 phc_cycles; + int rc; + + ptp = container_of(cc, struct qede_ptp, cc); + edev = ptp->edev; + rc = ptp->ops->read_cc(edev->cdev, &phc_cycles); + if (rc) + WARN_ONCE(1, "PHC read err %d\n", rc); + + DP_VERBOSE(edev, QED_MSG_DEBUG, "PHC read cycles = %llu\n", phc_cycles); + + return phc_cycles; +} + +static void qede_ptp_init_cc(struct qede_dev *edev) +{ + struct qede_ptp *ptp; + + ptp = edev->ptp; + if (!ptp) + return; + + memset(&ptp->cc, 0, sizeof(ptp->cc)); + ptp->cc.read = qede_ptp_read_cc; + ptp->cc.mask = CYCLECOUNTER_MASK(64); + ptp->cc.shift = 0; + ptp->cc.mult = 1; +} + +static int qede_ptp_cfg_filters(struct qede_dev *edev) +{ + struct qede_ptp *ptp = edev->ptp; + + if (!ptp) + return -EIO; + + if (!ptp->hw_ts_ioctl_called) { + DP_INFO(edev, "TS IOCTL not called\n"); + return 0; + } + + switch (ptp->tx_type) { + case HWTSTAMP_TX_ON: + edev->flags |= QEDE_TX_TIMESTAMPING_EN; + ptp->ops->hwtstamp_tx_on(edev->cdev); + break; + + case HWTSTAMP_TX_ONESTEP_SYNC: + DP_ERR(edev, "One-step timestamping is not supported\n"); + return -ERANGE; + } + + spin_lock_bh(&ptp->lock); + switch (ptp->rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + ptp->rx_filter = HWTSTAMP_FILTER_NONE; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + ptp->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + /* Initialize PTP detection for UDP/IPv4 events */ + ptp->ops->cfg_rx_filters(edev->cdev, QED_PTP_FILTER_IPV4); + break; + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + ptp->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; + /* Initialize PTP detection for UDP/IPv4 or UDP/IPv6 events */ + ptp->ops->cfg_rx_filters(edev->cdev, QED_PTP_FILTER_IPV4_IPV6); + break; + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + ptp->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + /* Initialize PTP detection L2 events */ + ptp->ops->cfg_rx_filters(edev->cdev, QED_PTP_FILTER_L2); + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + ptp->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + /* Initialize PTP detection L2, UDP/IPv4 or UDP/IPv6 events */ + ptp->ops->cfg_rx_filters(edev->cdev, + QED_PTP_FILTER_L2_IPV4_IPV6); + break; + } + + spin_unlock_bh(&ptp->lock); + + return 0; +} + +int qede_ptp_hw_ts(struct qede_dev *edev, struct ifreq *ifr) +{ + struct hwtstamp_config config; + struct qede_ptp *ptp; + int rc; + + ptp = edev->ptp; + if (!ptp) + return -EIO; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + DP_VERBOSE(edev, QED_MSG_DEBUG, + "HWTSTAMP IOCTL: Requested tx_type = %d, requested rx_filters = %d\n", + config.tx_type, config.rx_filter); + + if (config.flags) { + DP_ERR(edev, "config.flags is reserved for future use\n"); + return -EINVAL; + } + + ptp->hw_ts_ioctl_called = 1; + ptp->tx_type = config.tx_type; + ptp->rx_filter = config.rx_filter; + + rc = qede_ptp_cfg_filters(edev); + if (rc) + return rc; + + config.rx_filter = ptp->rx_filter; + + return copy_to_user(ifr->ifr_data, &config, + sizeof(config)) ? -EFAULT : 0; +} + +/* Called during load, to initialize PTP-related stuff */ +static void qede_ptp_init(struct qede_dev *edev, bool init_tc) +{ + struct qede_ptp *ptp; + int rc; + + ptp = edev->ptp; + if (!ptp) + return; + + spin_lock_init(&ptp->lock); + + /* Configure PTP in HW */ + rc = ptp->ops->enable(edev->cdev); + if (rc) { + DP_ERR(edev, "Stopping PTP initialization\n"); + return; + } + + /* Init work queue for Tx timestamping */ + INIT_WORK(&ptp->work, qede_ptp_task); + + /* Init cyclecounter and timecounter. This is done only in the first + * load. If done in every load, PTP application will fail when doing + * unload / load (e.g. MTU change) while it is running. + */ + if (init_tc) { + qede_ptp_init_cc(edev); + timecounter_init(&ptp->tc, &ptp->cc, + ktime_to_ns(ktime_get_real())); + } + + DP_VERBOSE(edev, QED_MSG_DEBUG, "PTP initialization is successful\n"); +} + +void qede_ptp_start(struct qede_dev *edev, bool init_tc) +{ + qede_ptp_init(edev, init_tc); + qede_ptp_cfg_filters(edev); +} + +void qede_ptp_remove(struct qede_dev *edev) +{ + struct qede_ptp *ptp; + + ptp = edev->ptp; + if (ptp && ptp->clock) { + ptp_clock_unregister(ptp->clock); + ptp->clock = NULL; + } + + kfree(ptp); + edev->ptp = NULL; +} + +int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *info) +{ + struct qede_ptp *ptp = edev->ptp; + + if (!ptp) + return -EIO; + + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + if (ptp->clock) + info->phc_index = ptp_clock_index(ptp->clock); + else + info->phc_index = -1; + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_PTP_V1_L4_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V2_DELAY_REQ); + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON); + + return 0; +} + +/* Called during unload, to stop PTP-related stuff */ +void qede_ptp_stop(struct qede_dev *edev) +{ + struct qede_ptp *ptp; + + ptp = edev->ptp; + if (!ptp) + return; + + /* Cancel PTP work queue. Should be done after the Tx queues are + * drained to prevent additional scheduling. + */ + cancel_work_sync(&ptp->work); + if (ptp->tx_skb) { + dev_kfree_skb_any(ptp->tx_skb); + ptp->tx_skb = NULL; + } + + /* Disable PTP in HW */ + spin_lock_bh(&ptp->lock); + ptp->ops->disable(edev->cdev); + spin_unlock_bh(&ptp->lock); +} + +int qede_ptp_register_phc(struct qede_dev *edev) +{ + struct qede_ptp *ptp; + + ptp = kzalloc(sizeof(*ptp), GFP_KERNEL); + if (!ptp) { + DP_INFO(edev, "Failed to allocate struct for PTP\n"); + return -ENOMEM; + } + + ptp->edev = edev; + ptp->ops = edev->ops->ptp; + if (!ptp->ops) { + kfree(ptp); + edev->ptp = NULL; + DP_ERR(edev, "PTP clock registeration failed\n"); + return -EIO; + } + + edev->ptp = ptp; + + /* Fill the ptp_clock_info struct and register PTP clock */ + ptp->clock_info.owner = THIS_MODULE; + snprintf(ptp->clock_info.name, 16, "%s", edev->ndev->name); + ptp->clock_info.max_adj = QED_MAX_PHC_DRIFT_PPB; + ptp->clock_info.n_alarm = 0; + ptp->clock_info.n_ext_ts = 0; + ptp->clock_info.n_per_out = 0; + ptp->clock_info.pps = 0; + ptp->clock_info.adjfreq = qede_ptp_adjfreq; + ptp->clock_info.adjtime = qede_ptp_adjtime; + ptp->clock_info.gettime64 = qede_ptp_gettime; + ptp->clock_info.settime64 = qede_ptp_settime; + ptp->clock_info.enable = qede_ptp_ancillary_feature_enable; + + ptp->clock = ptp_clock_register(&ptp->clock_info, &edev->pdev->dev); + if (IS_ERR(ptp->clock)) { + ptp->clock = NULL; + kfree(ptp); + edev->ptp = NULL; + DP_ERR(edev, "PTP clock registeration failed\n"); + } + + return 0; +} + +void qede_ptp_tx_ts(struct qede_dev *edev, struct sk_buff *skb) +{ + struct qede_ptp *ptp; + + ptp = edev->ptp; + if (!ptp) + return; + + if (unlikely(!(edev->flags & QEDE_TX_TIMESTAMPING_EN))) { + DP_NOTICE(edev, + "Tx timestamping was not enabled, this packet will not be timestamped\n"); + } else if (unlikely(ptp->tx_skb)) { + DP_NOTICE(edev, + "The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n"); + } else { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + /* schedule check for Tx timestamp */ + ptp->tx_skb = skb_get(skb); + schedule_work(&ptp->work); + } +} + +void qede_ptp_rx_ts(struct qede_dev *edev, struct sk_buff *skb) +{ + struct qede_ptp *ptp; + u64 timestamp, ns; + int rc; + + ptp = edev->ptp; + if (!ptp) + return; + + spin_lock_bh(&ptp->lock); + rc = ptp->ops->read_rx_ts(edev->cdev, ×tamp); + if (rc) { + spin_unlock_bh(&ptp->lock); + DP_INFO(edev, "Invalid Rx timestamp\n"); + return; + } + + ns = timecounter_cyc2time(&ptp->tc, timestamp); + spin_unlock_bh(&ptp->lock); + skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(ns); + DP_VERBOSE(edev, QED_MSG_DEBUG, + "Rx timestamp, timestamp cycles = %llu, ns = %llu\n", + timestamp, ns); +} diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.h b/drivers/net/ethernet/qlogic/qede/qede_ptp.h new file mode 100644 index 000000000000..f328f9bba53a --- /dev/null +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.h @@ -0,0 +1,65 @@ +/* QLogic qede NIC Driver + * Copyright (c) 2015-2017 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _QEDE_PTP_H_ +#define _QEDE_PTP_H_ + +#include <linux/ptp_clock_kernel.h> +#include <linux/net_tstamp.h> +#include <linux/timecounter.h> +#include "qede.h" + +void qede_ptp_rx_ts(struct qede_dev *edev, struct sk_buff *skb); +void qede_ptp_tx_ts(struct qede_dev *edev, struct sk_buff *skb); +int qede_ptp_hw_ts(struct qede_dev *edev, struct ifreq *req); +void qede_ptp_start(struct qede_dev *edev, bool init_tc); +void qede_ptp_stop(struct qede_dev *edev); +void qede_ptp_remove(struct qede_dev *edev); +int qede_ptp_register_phc(struct qede_dev *edev); +int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *ts); + +static inline void qede_ptp_record_rx_ts(struct qede_dev *edev, + union eth_rx_cqe *cqe, + struct sk_buff *skb) +{ + /* Check if this packet was timestamped */ + if (unlikely(le16_to_cpu(cqe->fast_path_regular.pars_flags.flags) & + (1 << PARSING_AND_ERR_FLAGS_TIMESTAMPRECORDED_SHIFT))) { + if (likely(le16_to_cpu(cqe->fast_path_regular.pars_flags.flags) + & (1 << PARSING_AND_ERR_FLAGS_TIMESYNCPKT_SHIFT))) { + qede_ptp_rx_ts(edev, skb); + } else { + DP_INFO(edev, + "Timestamp recorded for non PTP packets\n"); + } + } +} +#endif /* _QEDE_PTP_H_ */ diff --git a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c index 0d9945fb79be..bbe24639aa5a 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c @@ -227,7 +227,7 @@ static void emac_get_regs(struct net_device *netdev, static int emac_get_regs_len(struct net_device *netdev) { - return EMAC_MAX_REG_SIZE * sizeof(32); + return EMAC_MAX_REG_SIZE * sizeof(u32); } static const struct ethtool_ops emac_ethtool_ops = { diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 761ccc64eee9..b29a819f721d 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -6456,6 +6456,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = { .rx_ts_offset = ES_DZ_RX_PREFIX_TSTAMP_OFST, .can_rx_scatter = true, .always_rx_scatter = true, + .min_interrupt_mode = EFX_INT_MODE_MSIX, .max_interrupt_mode = EFX_INT_MODE_MSIX, .timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH, .offload_features = EF10_OFFLOAD_FEATURES, @@ -6588,6 +6589,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .can_rx_scatter = true, .always_rx_scatter = true, .option_descriptors = true, + .min_interrupt_mode = EFX_INT_MODE_LEGACY, .max_interrupt_mode = EFX_INT_MODE_MSIX, .timer_period_max = 1 << ERF_DD_EVQ_IND_TIMER_VAL_WIDTH, .offload_features = EF10_OFFLOAD_FEATURES, diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index cb8e2c3f806a..8c4c273643dc 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -1420,9 +1420,12 @@ static int efx_probe_interrupts(struct efx_nic *efx) xentries, 1, n_channels); if (rc < 0) { /* Fall back to single channel MSI */ - efx->interrupt_mode = EFX_INT_MODE_MSI; netif_err(efx, drv, efx->net_dev, "could not enable MSI-X\n"); + if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI) + efx->interrupt_mode = EFX_INT_MODE_MSI; + else + return rc; } else if (rc < n_channels) { netif_err(efx, drv, efx->net_dev, "WARNING: Insufficient MSI-X vectors" @@ -1465,7 +1468,10 @@ static int efx_probe_interrupts(struct efx_nic *efx) } else { netif_err(efx, drv, efx->net_dev, "could not enable MSI\n"); - efx->interrupt_mode = EFX_INT_MODE_LEGACY; + if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY) + efx->interrupt_mode = EFX_INT_MODE_LEGACY; + else + return rc; } } @@ -2935,7 +2941,7 @@ static const struct efx_phy_operations efx_dummy_phy_operations = { static int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev, struct net_device *net_dev) { - int i; + int rc = -ENOMEM, i; /* Initialise common structures */ INIT_LIST_HEAD(&efx->node); @@ -2976,8 +2982,15 @@ static int efx_init_struct(struct efx_nic *efx, } /* Higher numbered interrupt modes are less capable! */ + if (WARN_ON_ONCE(efx->type->max_interrupt_mode > + efx->type->min_interrupt_mode)) { + rc = -EIO; + goto fail; + } efx->interrupt_mode = max(efx->type->max_interrupt_mode, interrupt_mode); + efx->interrupt_mode = min(efx->type->min_interrupt_mode, + interrupt_mode); /* Would be good to use the net_dev name, but we're too early */ snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", @@ -2990,7 +3003,7 @@ static int efx_init_struct(struct efx_nic *efx, fail: efx_fini_struct(efx); - return -ENOMEM; + return rc; } static void efx_fini_struct(struct efx_nic *efx) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 5cb8112b1cf3..c0537ea06c9a 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1142,8 +1142,10 @@ struct efx_udp_tunnel { * @can_rx_scatter: NIC is able to scatter packets to multiple buffers * @always_rx_scatter: NIC will always scatter packets to multiple buffers * @option_descriptors: NIC supports TX option descriptors + * @min_interrupt_mode: Lowest capability interrupt mode supported + * from &enum efx_int_mode. * @max_interrupt_mode: Highest capability interrupt mode supported - * from &enum efx_init_mode. + * from &enum efx_int_mode. * @timer_period_max: Maximum period of interrupt timer (in ticks) * @offload_features: net_device feature flags for protocol offload * features implemented in hardware @@ -1306,6 +1308,7 @@ struct efx_nic_type { bool can_rx_scatter; bool always_rx_scatter; bool option_descriptors; + unsigned int min_interrupt_mode; unsigned int max_interrupt_mode; unsigned int timer_period_max; netdev_features_t offload_features; diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index b1d36df71ecb..a617f657eae3 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -1069,6 +1069,7 @@ const struct efx_nic_type siena_a0_nic_type = { .rx_buffer_padding = 0, .can_rx_scatter = true, .option_descriptors = false, + .min_interrupt_mode = EFX_INT_MODE_LEGACY, .max_interrupt_mode = EFX_INT_MODE_MSIX, .timer_period_max = 1 << FRF_CZ_TC_TIMER_VAL_WIDTH, .offload_features = (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index aab895d9257e..5ff6bc4eb8f1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -442,24 +442,24 @@ static void stmmac_ethtool_gregs(struct net_device *dev, memset(reg_space, 0x0, REG_SPACE_SIZE); - if (!(priv->plat->has_gmac || priv->plat->has_gmac4)) { + if (priv->plat->has_gmac || priv->plat->has_gmac4) { /* MAC registers */ - for (i = 0; i < 12; i++) + for (i = 0; i < 55; i++) reg_space[i] = readl(priv->ioaddr + (i * 4)); /* DMA registers */ - for (i = 0; i < 9; i++) - reg_space[i + 12] = + for (i = 0; i < 22; i++) + reg_space[i + 55] = readl(priv->ioaddr + (DMA_BUS_MODE + (i * 4))); - reg_space[22] = readl(priv->ioaddr + DMA_CUR_TX_BUF_ADDR); - reg_space[23] = readl(priv->ioaddr + DMA_CUR_RX_BUF_ADDR); } else { /* MAC registers */ - for (i = 0; i < 55; i++) + for (i = 0; i < 12; i++) reg_space[i] = readl(priv->ioaddr + (i * 4)); /* DMA registers */ - for (i = 0; i < 22; i++) - reg_space[i + 55] = + for (i = 0; i < 9; i++) + reg_space[i + 12] = readl(priv->ioaddr + (DMA_BUS_MODE + (i * 4))); + reg_space[22] = readl(priv->ioaddr + DMA_CUR_TX_BUF_ADDR); + reg_space[23] = readl(priv->ioaddr + DMA_CUR_RX_BUF_ADDR); } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 7251871eaa32..3cbe09682afe 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -716,32 +716,36 @@ static void stmmac_adjust_link(struct net_device *dev) new_state = 1; switch (phydev->speed) { case 1000: - if (likely((priv->plat->has_gmac) || - (priv->plat->has_gmac4))) + if (priv->plat->has_gmac || + priv->plat->has_gmac4) ctrl &= ~priv->hw->link.port; - stmmac_hw_fix_mac_speed(priv); break; case 100: + if (priv->plat->has_gmac || + priv->plat->has_gmac4) { + ctrl |= priv->hw->link.port; + ctrl |= priv->hw->link.speed; + } else { + ctrl &= ~priv->hw->link.port; + } + break; case 10: - if (likely((priv->plat->has_gmac) || - (priv->plat->has_gmac4))) { + if (priv->plat->has_gmac || + priv->plat->has_gmac4) { ctrl |= priv->hw->link.port; - if (phydev->speed == SPEED_100) { - ctrl |= priv->hw->link.speed; - } else { - ctrl &= ~(priv->hw->link.speed); - } + ctrl &= ~(priv->hw->link.speed); } else { ctrl &= ~priv->hw->link.port; } - stmmac_hw_fix_mac_speed(priv); break; default: netif_warn(priv, link, priv->dev, "broken speed: %d\n", phydev->speed); + phydev->speed = SPEED_UNKNOWN; break; } - + if (phydev->speed != SPEED_UNKNOWN) + stmmac_hw_fix_mac_speed(priv); priv->speed = phydev->speed; } @@ -754,8 +758,8 @@ static void stmmac_adjust_link(struct net_device *dev) } else if (priv->oldlink) { new_state = 1; priv->oldlink = 0; - priv->speed = 0; - priv->oldduplex = -1; + priv->speed = SPEED_UNKNOWN; + priv->oldduplex = DUPLEX_UNKNOWN; } if (new_state && netif_msg_link(priv)) @@ -817,8 +821,8 @@ static int stmmac_init_phy(struct net_device *dev) int interface = priv->plat->interface; int max_speed = priv->plat->max_speed; priv->oldlink = 0; - priv->speed = 0; - priv->oldduplex = -1; + priv->speed = SPEED_UNKNOWN; + priv->oldduplex = DUPLEX_UNKNOWN; if (priv->plat->phy_node) { phydev = of_phy_connect(dev, priv->plat->phy_node, @@ -3434,8 +3438,8 @@ int stmmac_suspend(struct device *dev) spin_unlock_irqrestore(&priv->lock, flags); priv->oldlink = 0; - priv->speed = 0; - priv->oldduplex = -1; + priv->speed = SPEED_UNKNOWN; + priv->oldduplex = DUPLEX_UNKNOWN; return 0; } EXPORT_SYMBOL_GPL(stmmac_suspend); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index d9893cf05695..db157a47000c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -247,50 +247,48 @@ int stmmac_mdio_register(struct net_device *ndev) found = 0; for (addr = 0; addr < PHY_MAX_ADDR; addr++) { struct phy_device *phydev = mdiobus_get_phy(new_bus, addr); + int act = 0; + char irq_num[4]; + char *irq_str; + + if (!phydev) + continue; + + /* + * If an IRQ was provided to be assigned after + * the bus probe, do it here. + */ + if (!mdio_bus_data->irqs && + (mdio_bus_data->probed_phy_irq > 0)) { + new_bus->irq[addr] = mdio_bus_data->probed_phy_irq; + phydev->irq = mdio_bus_data->probed_phy_irq; + } - if (phydev) { - int act = 0; - char irq_num[4]; - char *irq_str; - - /* - * If an IRQ was provided to be assigned after - * the bus probe, do it here. - */ - if ((!mdio_bus_data->irqs) && - (mdio_bus_data->probed_phy_irq > 0)) { - new_bus->irq[addr] = - mdio_bus_data->probed_phy_irq; - phydev->irq = mdio_bus_data->probed_phy_irq; - } - - /* - * If we're going to bind the MAC to this PHY bus, - * and no PHY number was provided to the MAC, - * use the one probed here. - */ - if (priv->plat->phy_addr == -1) - priv->plat->phy_addr = addr; - - act = (priv->plat->phy_addr == addr); - switch (phydev->irq) { - case PHY_POLL: - irq_str = "POLL"; - break; - case PHY_IGNORE_INTERRUPT: - irq_str = "IGNORE"; - break; - default: - sprintf(irq_num, "%d", phydev->irq); - irq_str = irq_num; - break; - } - netdev_info(ndev, "PHY ID %08x at %d IRQ %s (%s)%s\n", - phydev->phy_id, addr, - irq_str, phydev_name(phydev), - act ? " active" : ""); - found = 1; + /* + * If we're going to bind the MAC to this PHY bus, + * and no PHY number was provided to the MAC, + * use the one probed here. + */ + if (priv->plat->phy_addr == -1) + priv->plat->phy_addr = addr; + + act = (priv->plat->phy_addr == addr); + switch (phydev->irq) { + case PHY_POLL: + irq_str = "POLL"; + break; + case PHY_IGNORE_INTERRUPT: + irq_str = "IGNORE"; + break; + default: + sprintf(irq_num, "%d", phydev->irq); + irq_str = irq_num; + break; } + netdev_info(ndev, "PHY ID %08x at %d IRQ %s (%s)%s\n", + phydev->phy_id, addr, irq_str, phydev_name(phydev), + act ? " active" : ""); + found = 1; } if (!found && !mdio_node) { diff --git a/drivers/net/ethernet/sun/Kconfig b/drivers/net/ethernet/sun/Kconfig index a4b40e3015e5..b2caf5132bd2 100644 --- a/drivers/net/ethernet/sun/Kconfig +++ b/drivers/net/ethernet/sun/Kconfig @@ -70,19 +70,23 @@ config CASSINI <http://docs.oracle.com/cd/E19113-01/giga.ether.pci/817-4341-10/817-4341-10.pdf>. config SUNVNET_COMMON - bool + tristate "Common routines to support Sun Virtual Networking" depends on SUN_LDOMS - default y if SUN_LDOMS + default m config SUNVNET tristate "Sun Virtual Network support" + default m depends on SUN_LDOMS + depends on SUNVNET_COMMON ---help--- Support for virtual network devices under Sun Logical Domains. config LDMVSW tristate "Sun4v LDoms Virtual Switch support" + default m depends on SUN_LDOMS + depends on SUNVNET_COMMON ---help--- Support for virtual switch devices under Sun4v Logical Domains. This driver adds a network interface for every vsw-port node diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 335b87660638..89952deae47f 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -41,11 +41,11 @@ static u8 vsw_port_hwaddr[ETH_ALEN] = {0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; #define DRV_MODULE_NAME "ldmvsw" -#define DRV_MODULE_VERSION "1.0" -#define DRV_MODULE_RELDATE "Jan 15, 2016" +#define DRV_MODULE_VERSION "1.1" +#define DRV_MODULE_RELDATE "February 3, 2017" static char version[] = - DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; + DRV_MODULE_NAME " " DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")"; MODULE_AUTHOR("Oracle"); MODULE_DESCRIPTION("Sun4v LDOM Virtual Switch Driver"); MODULE_LICENSE("GPL"); @@ -234,8 +234,7 @@ static struct net_device *vsw_alloc_netdev(u8 hwaddr[], dev->ethtool_ops = &vsw_ethtool_ops; dev->watchdog_timeo = VSW_TX_TIMEOUT; - dev->hw_features = NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE | - NETIF_F_HW_CSUM | NETIF_F_SG; + dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG; dev->features = dev->hw_features; /* MTU range: 68 - 65535 */ @@ -259,11 +258,6 @@ static struct vio_driver_ops vsw_vio_ops = { .handshake_complete = sunvnet_handshake_complete_common, }; -static void print_version(void) -{ - printk_once(KERN_INFO "%s", version); -} - static const char *remote_macaddr_prop = "remote-mac-address"; static const char *id_prop = "id"; @@ -279,8 +273,6 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) const u64 *port_id; u64 handle; - print_version(); - hp = mdesc_grab(); rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len); @@ -327,7 +319,7 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) port->vp = vp; port->dev = dev; port->switch_port = 1; - port->tso = true; + port->tso = false; /* no tso in vsw, misbehaves in bridge */ port->tsolen = 0; /* Mark the port as belonging to ldmvsw which directs the @@ -457,6 +449,7 @@ static struct vio_driver vsw_port_driver = { static int __init vsw_init(void) { + pr_info("%s\n", version); return vio_register_driver(&vsw_port_driver); } diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 5356a7074796..4cc2571f71c6 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -38,11 +38,11 @@ #define VNET_TX_TIMEOUT (5 * HZ) #define DRV_MODULE_NAME "sunvnet" -#define DRV_MODULE_VERSION "1.0" -#define DRV_MODULE_RELDATE "June 25, 2007" +#define DRV_MODULE_VERSION "2.0" +#define DRV_MODULE_RELDATE "February 3, 2017" static char version[] = - DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; + DRV_MODULE_NAME " " DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")"; MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); MODULE_DESCRIPTION("Sun LDOM virtual network driver"); MODULE_LICENSE("GPL"); @@ -303,11 +303,6 @@ static struct vio_driver_ops vnet_vio_ops = { .handshake_complete = sunvnet_handshake_complete_common, }; -static void print_version(void) -{ - printk_once(KERN_INFO "%s", version); -} - const char *remote_macaddr_prop = "remote-mac-address"; static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) @@ -319,8 +314,6 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) const u64 *rmac; int len, i, err, switch_port; - print_version(); - hp = mdesc_grab(); vp = vnet_find_parent(hp, vdev->mp, vdev); @@ -446,6 +439,7 @@ static struct vio_driver vnet_port_driver = { static int __init vnet_init(void) { + pr_info("%s\n", version); return vio_register_driver(&vnet_port_driver); } diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index 191c8ade6155..fa2d11ca9b81 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -37,6 +37,11 @@ */ #define VNET_MAX_RETRIES 10 +MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); +MODULE_DESCRIPTION("Sun LDOM virtual network support library"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.1"); + static int __vnet_tx_trigger(struct vnet_port *port, u32 start); static void vnet_port_reset(struct vnet_port *port); @@ -181,6 +186,7 @@ static int handle_attr_info(struct vio_driver_state *vio, } else { pkt->cflags &= ~VNET_LSO_IPV4_CAPAB; pkt->ipv4_lso_maxlen = 0; + port->tsolen = 0; } /* for version >= 1.6, ACK packet mode we support */ @@ -714,12 +720,8 @@ static void maybe_tx_wakeup(struct vnet_port *port) txq = netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port), port->q_index); __netif_tx_lock(txq, smp_processor_id()); - if (likely(netif_tx_queue_stopped(txq))) { - struct vio_dring_state *dr; - - dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + if (likely(netif_tx_queue_stopped(txq))) netif_tx_wake_queue(txq); - } __netif_tx_unlock(txq); } @@ -737,41 +739,37 @@ static int vnet_event_napi(struct vnet_port *port, int budget) struct vio_driver_state *vio = &port->vio; int tx_wakeup, err; int npkts = 0; - int event = (port->rx_event & LDC_EVENT_RESET); - -ldc_ctrl: - if (unlikely(event == LDC_EVENT_RESET || - event == LDC_EVENT_UP)) { - vio_link_state_change(vio, event); - - if (event == LDC_EVENT_RESET) { - vnet_port_reset(port); - vio_port_up(vio); - - /* If the device is running but its tx queue was - * stopped (due to flow control), restart it. - * This is necessary since vnet_port_reset() - * clears the tx drings and thus we may never get - * back a VIO_TYPE_DATA ACK packet - which is - * the normal mechanism to restart the tx queue. - */ - if (netif_running(dev)) - maybe_tx_wakeup(port); - } + + /* we don't expect any other bits */ + BUG_ON(port->rx_event & ~(LDC_EVENT_DATA_READY | + LDC_EVENT_RESET | + LDC_EVENT_UP)); + + /* RESET takes precedent over any other event */ + if (port->rx_event & LDC_EVENT_RESET) { + vio_link_state_change(vio, LDC_EVENT_RESET); + vnet_port_reset(port); + vio_port_up(vio); + + /* If the device is running but its tx queue was + * stopped (due to flow control), restart it. + * This is necessary since vnet_port_reset() + * clears the tx drings and thus we may never get + * back a VIO_TYPE_DATA ACK packet - which is + * the normal mechanism to restart the tx queue. + */ + if (netif_running(dev)) + maybe_tx_wakeup(port); + port->rx_event = 0; return 0; } - /* We may have multiple LDC events in rx_event. Unroll send_events() */ - event = (port->rx_event & LDC_EVENT_UP); - port->rx_event &= ~(LDC_EVENT_RESET | LDC_EVENT_UP); - if (event == LDC_EVENT_UP) - goto ldc_ctrl; - event = port->rx_event; - if (!(event & LDC_EVENT_DATA_READY)) - return 0; - /* we dont expect any other bits than RESET, UP, DATA_READY */ - BUG_ON(event != LDC_EVENT_DATA_READY); + if (port->rx_event & LDC_EVENT_UP) { + vio_link_state_change(vio, LDC_EVENT_UP); + port->rx_event = 0; + return 0; + } err = 0; tx_wakeup = 0; @@ -794,25 +792,25 @@ ldc_ctrl: pkt->start_idx = vio_dring_next(dr, port->napi_stop_idx); pkt->end_idx = -1; - goto napi_resume; - } - err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf)); - if (unlikely(err < 0)) { - if (err == -ECONNRESET) - vio_conn_reset(vio); - break; + } else { + err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf)); + if (unlikely(err < 0)) { + if (err == -ECONNRESET) + vio_conn_reset(vio); + break; + } + if (err == 0) + break; + viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n", + msgbuf.tag.type, + msgbuf.tag.stype, + msgbuf.tag.stype_env, + msgbuf.tag.sid); + err = vio_validate_sid(vio, &msgbuf.tag); + if (err < 0) + break; } - if (err == 0) - break; - viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n", - msgbuf.tag.type, - msgbuf.tag.stype, - msgbuf.tag.stype_env, - msgbuf.tag.sid); - err = vio_validate_sid(vio, &msgbuf.tag); - if (err < 0) - break; -napi_resume: + if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) { if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) { if (!sunvnet_port_is_up_common(port)) { @@ -1256,10 +1254,8 @@ int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, rcu_read_lock(); port = vnet_tx_port(skb, dev); - if (unlikely(!port)) { - rcu_read_unlock(); + if (unlikely(!port)) goto out_dropped; - } if (skb_is_gso(skb) && skb->len > port->tsolen) { err = vnet_handle_offloads(port, skb, vnet_tx_port); @@ -1284,7 +1280,6 @@ int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, fl4.saddr = ip_hdr(skb)->saddr; rt = ip_route_output_key(dev_net(dev), &fl4); - rcu_read_unlock(); if (!IS_ERR(rt)) { skb_dst_set(skb, &rt->dst); icmp_send(skb, ICMP_DEST_UNREACH, @@ -1426,6 +1421,7 @@ ldc_start_done: dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1); if (unlikely(vnet_tx_dring_avail(dr) < 1)) { netif_tx_stop_queue(txq); + smp_rmb(); if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr)) netif_tx_wake_queue(txq); } @@ -1443,8 +1439,7 @@ out_dropped: jiffies + VNET_CLEAN_TIMEOUT); else if (port) del_timer(&port->clean_timer); - if (port) - rcu_read_unlock(); + rcu_read_unlock(); if (skb) dev_kfree_skb(skb); vnet_free_skbs(freeskbs); @@ -1641,7 +1636,7 @@ static void vnet_port_reset(struct vnet_port *port) del_timer(&port->clean_timer); sunvnet_port_free_tx_bufs_common(port); port->rmtu = 0; - port->tso = true; + port->tso = (port->vsw == 0); /* no tso in vsw, misbehaves in bridge */ port->tsolen = 0; } diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 503fa8af37d7..9f3d9c67e3fe 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -399,6 +399,7 @@ struct cpsw_common { struct cpts *cpts; int rx_ch_num, tx_ch_num; int speed; + int usage_count; }; struct cpsw_priv { @@ -671,18 +672,6 @@ static void cpsw_intr_disable(struct cpsw_common *cpsw) return; } -static int cpsw_get_usage_count(struct cpsw_common *cpsw) -{ - u32 i; - u32 usage_count = 0; - - for (i = 0; i < cpsw->data.slaves; i++) - if (cpsw->slaves[i].ndev && netif_running(cpsw->slaves[i].ndev)) - usage_count++; - - return usage_count; -} - static void cpsw_tx_handler(void *token, int len, int status) { struct netdev_queue *txq; @@ -716,8 +705,7 @@ static void cpsw_rx_handler(void *token, int len, int status) if (unlikely(status < 0) || unlikely(!netif_running(ndev))) { /* In dual emac mode check for all interfaces */ - if (cpsw->data.dual_emac && - cpsw_get_usage_count(cpsw) && + if (cpsw->data.dual_emac && cpsw->usage_count && (status >= 0)) { /* The packet received is for the interface which * is already down and the other interface is up @@ -1492,11 +1480,8 @@ static int cpsw_ndo_open(struct net_device *ndev) CPSW_MAJOR_VERSION(reg), CPSW_MINOR_VERSION(reg), CPSW_RTL_VERSION(reg)); - /* Initialize host and slave ports. - * Given ndev is marked as opened already, so init port only if 1 ndev - * is opened - */ - if (cpsw_get_usage_count(cpsw) < 2) + /* Initialize host and slave ports */ + if (!cpsw->usage_count) cpsw_init_host_port(priv); for_each_slave(priv, cpsw_slave_open, priv); @@ -1507,10 +1492,8 @@ static int cpsw_ndo_open(struct net_device *ndev) cpsw_ale_add_vlan(cpsw->ale, cpsw->data.default_vlan, ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0); - /* Given ndev is marked as opened already, so if more ndev - * are opened - no need to init shared resources. - */ - if (cpsw_get_usage_count(cpsw) < 2) { + /* initialize shared resources for every ndev */ + if (!cpsw->usage_count) { /* disable priority elevation */ __raw_writel(0, &cpsw->regs->ptype); @@ -1552,6 +1535,7 @@ static int cpsw_ndo_open(struct net_device *ndev) cpdma_ctlr_start(cpsw->dma); cpsw_intr_enable(cpsw); + cpsw->usage_count++; return 0; @@ -1572,10 +1556,7 @@ static int cpsw_ndo_stop(struct net_device *ndev) netif_tx_stop_all_queues(priv->ndev); netif_carrier_off(priv->ndev); - /* Given ndev is marked as close already, - * so disable shared resources if no open devices - */ - if (!cpsw_get_usage_count(cpsw)) { + if (cpsw->usage_count <= 1) { napi_disable(&cpsw->napi_rx); napi_disable(&cpsw->napi_tx); cpts_unregister(cpsw->cpts); @@ -1588,6 +1569,7 @@ static int cpsw_ndo_stop(struct net_device *ndev) if (cpsw_need_resplit(cpsw)) cpsw_split_res(ndev); + cpsw->usage_count--; pm_runtime_put_sync(cpsw->dev); return 0; } @@ -2393,7 +2375,7 @@ static int cpsw_resume_data_pass(struct net_device *ndev) netif_dormant_off(slave->ndev); /* After this receive is started */ - if (cpsw_get_usage_count(cpsw)) { + if (cpsw->usage_count) { ret = cpsw_fill_rx_channels(priv); if (ret) return ret; @@ -2447,7 +2429,7 @@ static int cpsw_set_channels(struct net_device *ndev, } } - if (cpsw_get_usage_count(cpsw)) + if (cpsw->usage_count) cpsw_split_res(ndev); ret = cpsw_resume_data_pass(ndev); @@ -2529,7 +2511,7 @@ static int cpsw_set_ringparam(struct net_device *ndev, cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending); - if (cpsw_get_usage_count(cpsw)) + if (cpsw->usage_count) cpdma_chan_split_pool(cpsw->dma); ret = cpsw_resume_data_pass(ndev); @@ -3050,7 +3032,7 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_dma_ret; } - ale_params.dev = &ndev->dev; + ale_params.dev = &pdev->dev; ale_params.ale_ageout = ale_ageout; ale_params.ale_entries = data->ale_entries; ale_params.ale_ports = data->slaves; @@ -3225,7 +3207,7 @@ static int cpsw_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct net_device *ndev = platform_get_drvdata(pdev); - struct cpsw_common *cpsw = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); /* Select default pin state */ pinctrl_pm_select_default_state(dev); diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index e3070fd88bce..69e31ceccfae 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -100,6 +100,14 @@ /* BUFFER_ALIGN(adr) calculates the number of bytes to the next alignment. */ #define BUFFER_ALIGN(adr) ((ALIGNMENT - ((u32) adr)) % ALIGNMENT) +#ifdef __BIG_ENDIAN +#define xemaclite_readl ioread32be +#define xemaclite_writel iowrite32be +#else +#define xemaclite_readl ioread32 +#define xemaclite_writel iowrite32 +#endif + /** * struct net_local - Our private per device data * @ndev: instance of the network device @@ -156,15 +164,15 @@ static void xemaclite_enable_interrupts(struct net_local *drvdata) u32 reg_data; /* Enable the Tx interrupts for the first Buffer */ - reg_data = __raw_readl(drvdata->base_addr + XEL_TSR_OFFSET); - __raw_writel(reg_data | XEL_TSR_XMIT_IE_MASK, - drvdata->base_addr + XEL_TSR_OFFSET); + reg_data = xemaclite_readl(drvdata->base_addr + XEL_TSR_OFFSET); + xemaclite_writel(reg_data | XEL_TSR_XMIT_IE_MASK, + drvdata->base_addr + XEL_TSR_OFFSET); /* Enable the Rx interrupts for the first buffer */ - __raw_writel(XEL_RSR_RECV_IE_MASK, drvdata->base_addr + XEL_RSR_OFFSET); + xemaclite_writel(XEL_RSR_RECV_IE_MASK, drvdata->base_addr + XEL_RSR_OFFSET); /* Enable the Global Interrupt Enable */ - __raw_writel(XEL_GIER_GIE_MASK, drvdata->base_addr + XEL_GIER_OFFSET); + xemaclite_writel(XEL_GIER_GIE_MASK, drvdata->base_addr + XEL_GIER_OFFSET); } /** @@ -179,17 +187,17 @@ static void xemaclite_disable_interrupts(struct net_local *drvdata) u32 reg_data; /* Disable the Global Interrupt Enable */ - __raw_writel(XEL_GIER_GIE_MASK, drvdata->base_addr + XEL_GIER_OFFSET); + xemaclite_writel(XEL_GIER_GIE_MASK, drvdata->base_addr + XEL_GIER_OFFSET); /* Disable the Tx interrupts for the first buffer */ - reg_data = __raw_readl(drvdata->base_addr + XEL_TSR_OFFSET); - __raw_writel(reg_data & (~XEL_TSR_XMIT_IE_MASK), - drvdata->base_addr + XEL_TSR_OFFSET); + reg_data = xemaclite_readl(drvdata->base_addr + XEL_TSR_OFFSET); + xemaclite_writel(reg_data & (~XEL_TSR_XMIT_IE_MASK), + drvdata->base_addr + XEL_TSR_OFFSET); /* Disable the Rx interrupts for the first buffer */ - reg_data = __raw_readl(drvdata->base_addr + XEL_RSR_OFFSET); - __raw_writel(reg_data & (~XEL_RSR_RECV_IE_MASK), - drvdata->base_addr + XEL_RSR_OFFSET); + reg_data = xemaclite_readl(drvdata->base_addr + XEL_RSR_OFFSET); + xemaclite_writel(reg_data & (~XEL_RSR_RECV_IE_MASK), + drvdata->base_addr + XEL_RSR_OFFSET); } /** @@ -321,7 +329,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data, byte_count = ETH_FRAME_LEN; /* Check if the expected buffer is available */ - reg_data = __raw_readl(addr + XEL_TSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET); if ((reg_data & (XEL_TSR_XMIT_BUSY_MASK | XEL_TSR_XMIT_ACTIVE_MASK)) == 0) { @@ -334,7 +342,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data, addr = (void __iomem __force *)((u32 __force)addr ^ XEL_BUFFER_OFFSET); - reg_data = __raw_readl(addr + XEL_TSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET); if ((reg_data & (XEL_TSR_XMIT_BUSY_MASK | XEL_TSR_XMIT_ACTIVE_MASK)) != 0) @@ -345,16 +353,16 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data, /* Write the frame to the buffer */ xemaclite_aligned_write(data, (u32 __force *) addr, byte_count); - __raw_writel((byte_count & XEL_TPLR_LENGTH_MASK), - addr + XEL_TPLR_OFFSET); + xemaclite_writel((byte_count & XEL_TPLR_LENGTH_MASK), + addr + XEL_TPLR_OFFSET); /* Update the Tx Status Register to indicate that there is a * frame to send. Set the XEL_TSR_XMIT_ACTIVE_MASK flag which * is used by the interrupt handler to check whether a frame * has been transmitted */ - reg_data = __raw_readl(addr + XEL_TSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET); reg_data |= (XEL_TSR_XMIT_BUSY_MASK | XEL_TSR_XMIT_ACTIVE_MASK); - __raw_writel(reg_data, addr + XEL_TSR_OFFSET); + xemaclite_writel(reg_data, addr + XEL_TSR_OFFSET); return 0; } @@ -369,7 +377,7 @@ static int xemaclite_send_data(struct net_local *drvdata, u8 *data, * * Return: Total number of bytes received */ -static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data) +static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen) { void __iomem *addr; u16 length, proto_type; @@ -379,7 +387,7 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data) addr = (drvdata->base_addr + drvdata->next_rx_buf_to_use); /* Verify which buffer has valid data */ - reg_data = __raw_readl(addr + XEL_RSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_RSR_OFFSET); if ((reg_data & XEL_RSR_RECV_DONE_MASK) == XEL_RSR_RECV_DONE_MASK) { if (drvdata->rx_ping_pong != 0) @@ -396,27 +404,28 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data) return 0; /* No data was available */ /* Verify that buffer has valid data */ - reg_data = __raw_readl(addr + XEL_RSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_RSR_OFFSET); if ((reg_data & XEL_RSR_RECV_DONE_MASK) != XEL_RSR_RECV_DONE_MASK) return 0; /* No data was available */ } /* Get the protocol type of the ethernet frame that arrived */ - proto_type = ((ntohl(__raw_readl(addr + XEL_HEADER_OFFSET + + proto_type = ((ntohl(xemaclite_readl(addr + XEL_HEADER_OFFSET + XEL_RXBUFF_OFFSET)) >> XEL_HEADER_SHIFT) & XEL_RPLR_LENGTH_MASK); /* Check if received ethernet frame is a raw ethernet frame * or an IP packet or an ARP packet */ - if (proto_type > (ETH_FRAME_LEN + ETH_FCS_LEN)) { + if (proto_type > ETH_DATA_LEN) { if (proto_type == ETH_P_IP) { - length = ((ntohl(__raw_readl(addr + + length = ((ntohl(xemaclite_readl(addr + XEL_HEADER_IP_LENGTH_OFFSET + XEL_RXBUFF_OFFSET)) >> XEL_HEADER_SHIFT) & XEL_RPLR_LENGTH_MASK); + length = min_t(u16, length, ETH_DATA_LEN); length += ETH_HLEN + ETH_FCS_LEN; } else if (proto_type == ETH_P_ARP) @@ -429,14 +438,17 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data) /* Use the length in the frame, plus the header and trailer */ length = proto_type + ETH_HLEN + ETH_FCS_LEN; + if (WARN_ON(length > maxlen)) + length = maxlen; + /* Read from the EmacLite device */ xemaclite_aligned_read((u32 __force *) (addr + XEL_RXBUFF_OFFSET), data, length); /* Acknowledge the frame */ - reg_data = __raw_readl(addr + XEL_RSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_RSR_OFFSET); reg_data &= ~XEL_RSR_RECV_DONE_MASK; - __raw_writel(reg_data, addr + XEL_RSR_OFFSET); + xemaclite_writel(reg_data, addr + XEL_RSR_OFFSET); return length; } @@ -463,14 +475,14 @@ static void xemaclite_update_address(struct net_local *drvdata, xemaclite_aligned_write(address_ptr, (u32 __force *) addr, ETH_ALEN); - __raw_writel(ETH_ALEN, addr + XEL_TPLR_OFFSET); + xemaclite_writel(ETH_ALEN, addr + XEL_TPLR_OFFSET); /* Update the MAC address in the EmacLite */ - reg_data = __raw_readl(addr + XEL_TSR_OFFSET); - __raw_writel(reg_data | XEL_TSR_PROG_MAC_ADDR, addr + XEL_TSR_OFFSET); + reg_data = xemaclite_readl(addr + XEL_TSR_OFFSET); + xemaclite_writel(reg_data | XEL_TSR_PROG_MAC_ADDR, addr + XEL_TSR_OFFSET); /* Wait for EmacLite to finish with the MAC address update */ - while ((__raw_readl(addr + XEL_TSR_OFFSET) & + while ((xemaclite_readl(addr + XEL_TSR_OFFSET) & XEL_TSR_PROG_MAC_ADDR) != 0) ; } @@ -603,7 +615,7 @@ static void xemaclite_rx_handler(struct net_device *dev) skb_reserve(skb, 2); - len = xemaclite_recv_data(lp, (u8 *) skb->data); + len = xemaclite_recv_data(lp, (u8 *) skb->data, len); if (!len) { dev->stats.rx_errors++; @@ -640,32 +652,32 @@ static irqreturn_t xemaclite_interrupt(int irq, void *dev_id) u32 tx_status; /* Check if there is Rx Data available */ - if ((__raw_readl(base_addr + XEL_RSR_OFFSET) & + if ((xemaclite_readl(base_addr + XEL_RSR_OFFSET) & XEL_RSR_RECV_DONE_MASK) || - (__raw_readl(base_addr + XEL_BUFFER_OFFSET + XEL_RSR_OFFSET) + (xemaclite_readl(base_addr + XEL_BUFFER_OFFSET + XEL_RSR_OFFSET) & XEL_RSR_RECV_DONE_MASK)) xemaclite_rx_handler(dev); /* Check if the Transmission for the first buffer is completed */ - tx_status = __raw_readl(base_addr + XEL_TSR_OFFSET); + tx_status = xemaclite_readl(base_addr + XEL_TSR_OFFSET); if (((tx_status & XEL_TSR_XMIT_BUSY_MASK) == 0) && (tx_status & XEL_TSR_XMIT_ACTIVE_MASK) != 0) { tx_status &= ~XEL_TSR_XMIT_ACTIVE_MASK; - __raw_writel(tx_status, base_addr + XEL_TSR_OFFSET); + xemaclite_writel(tx_status, base_addr + XEL_TSR_OFFSET); tx_complete = true; } /* Check if the Transmission for the second buffer is completed */ - tx_status = __raw_readl(base_addr + XEL_BUFFER_OFFSET + XEL_TSR_OFFSET); + tx_status = xemaclite_readl(base_addr + XEL_BUFFER_OFFSET + XEL_TSR_OFFSET); if (((tx_status & XEL_TSR_XMIT_BUSY_MASK) == 0) && (tx_status & XEL_TSR_XMIT_ACTIVE_MASK) != 0) { tx_status &= ~XEL_TSR_XMIT_ACTIVE_MASK; - __raw_writel(tx_status, base_addr + XEL_BUFFER_OFFSET + - XEL_TSR_OFFSET); + xemaclite_writel(tx_status, base_addr + XEL_BUFFER_OFFSET + + XEL_TSR_OFFSET); tx_complete = true; } @@ -698,7 +710,7 @@ static int xemaclite_mdio_wait(struct net_local *lp) /* wait for the MDIO interface to not be busy or timeout after some time. */ - while (__raw_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET) & + while (xemaclite_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET) & XEL_MDIOCTRL_MDIOSTS_MASK) { if (time_before_eq(end, jiffies)) { WARN_ON(1); @@ -734,17 +746,17 @@ static int xemaclite_mdio_read(struct mii_bus *bus, int phy_id, int reg) * MDIO Address register. Set the Status bit in the MDIO Control * register to start a MDIO read transaction. */ - ctrl_reg = __raw_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET); - __raw_writel(XEL_MDIOADDR_OP_MASK | - ((phy_id << XEL_MDIOADDR_PHYADR_SHIFT) | reg), - lp->base_addr + XEL_MDIOADDR_OFFSET); - __raw_writel(ctrl_reg | XEL_MDIOCTRL_MDIOSTS_MASK, - lp->base_addr + XEL_MDIOCTRL_OFFSET); + ctrl_reg = xemaclite_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET); + xemaclite_writel(XEL_MDIOADDR_OP_MASK | + ((phy_id << XEL_MDIOADDR_PHYADR_SHIFT) | reg), + lp->base_addr + XEL_MDIOADDR_OFFSET); + xemaclite_writel(ctrl_reg | XEL_MDIOCTRL_MDIOSTS_MASK, + lp->base_addr + XEL_MDIOCTRL_OFFSET); if (xemaclite_mdio_wait(lp)) return -ETIMEDOUT; - rc = __raw_readl(lp->base_addr + XEL_MDIORD_OFFSET); + rc = xemaclite_readl(lp->base_addr + XEL_MDIORD_OFFSET); dev_dbg(&lp->ndev->dev, "xemaclite_mdio_read(phy_id=%i, reg=%x) == %x\n", @@ -781,13 +793,13 @@ static int xemaclite_mdio_write(struct mii_bus *bus, int phy_id, int reg, * Data register. Finally, set the Status bit in the MDIO Control * register to start a MDIO write transaction. */ - ctrl_reg = __raw_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET); - __raw_writel(~XEL_MDIOADDR_OP_MASK & - ((phy_id << XEL_MDIOADDR_PHYADR_SHIFT) | reg), - lp->base_addr + XEL_MDIOADDR_OFFSET); - __raw_writel(val, lp->base_addr + XEL_MDIOWR_OFFSET); - __raw_writel(ctrl_reg | XEL_MDIOCTRL_MDIOSTS_MASK, - lp->base_addr + XEL_MDIOCTRL_OFFSET); + ctrl_reg = xemaclite_readl(lp->base_addr + XEL_MDIOCTRL_OFFSET); + xemaclite_writel(~XEL_MDIOADDR_OP_MASK & + ((phy_id << XEL_MDIOADDR_PHYADR_SHIFT) | reg), + lp->base_addr + XEL_MDIOADDR_OFFSET); + xemaclite_writel(val, lp->base_addr + XEL_MDIOWR_OFFSET); + xemaclite_writel(ctrl_reg | XEL_MDIOCTRL_MDIOSTS_MASK, + lp->base_addr + XEL_MDIOCTRL_OFFSET); return 0; } @@ -834,8 +846,8 @@ static int xemaclite_mdio_setup(struct net_local *lp, struct device *dev) /* Enable the MDIO bus by asserting the enable bit in MDIO Control * register. */ - __raw_writel(XEL_MDIOCTRL_MDIOEN_MASK, - lp->base_addr + XEL_MDIOCTRL_OFFSET); + xemaclite_writel(XEL_MDIOCTRL_MDIOEN_MASK, + lp->base_addr + XEL_MDIOCTRL_OFFSET); bus = mdiobus_alloc(); if (!bus) { @@ -1126,8 +1138,8 @@ static int xemaclite_of_probe(struct platform_device *ofdev) } /* Clear the Tx CSR's in case this is a restart */ - __raw_writel(0, lp->base_addr + XEL_TSR_OFFSET); - __raw_writel(0, lp->base_addr + XEL_BUFFER_OFFSET + XEL_TSR_OFFSET); + xemaclite_writel(0, lp->base_addr + XEL_TSR_OFFSET); + xemaclite_writel(0, lp->base_addr + XEL_BUFFER_OFFSET + XEL_TSR_OFFSET); /* Set the MAC address in the EmacLite device */ xemaclite_update_address(lp, ndev->dev_addr); diff --git a/drivers/net/irda/au1k_ir.c b/drivers/net/irda/au1k_ir.c index 44e4f386a5dc..be4ea6aa57a9 100644 --- a/drivers/net/irda/au1k_ir.c +++ b/drivers/net/irda/au1k_ir.c @@ -25,7 +25,6 @@ #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/types.h> -#include <linux/ioport.h> #include <net/irda/irda.h> #include <net/irda/irmod.h> @@ -169,8 +168,6 @@ struct au1k_private { u32 speed; u32 newspeed; - struct timer_list timer; - struct resource *ioarea; struct au1k_irda_platform_data *platdata; struct clk *irda_clk; @@ -178,8 +175,6 @@ struct au1k_private { static int qos_mtt_bits = 0x07; /* 1 ms or more */ -#define RUN_AT(x) (jiffies + (x)) - static void au1k_irda_plat_set_phy_mode(struct au1k_private *p, int mode) { if (p->platdata && p->platdata->set_phy_mode) @@ -620,8 +615,6 @@ static int au1k_irda_start(struct net_device *dev) /* power up */ au1k_irda_plat_set_phy_mode(aup, AU1000_IRDA_PHY_MODE_SIR); - aup->timer.expires = RUN_AT((3 * HZ)); - aup->timer.data = (unsigned long)dev; return 0; } @@ -642,7 +635,6 @@ static int au1k_irda_stop(struct net_device *dev) } netif_stop_queue(dev); - del_timer(&aup->timer); /* disable the interrupt */ free_irq(aup->irq_tx, dev); diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 3ce1f7da8647..530586be05b4 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -113,10 +113,10 @@ struct xenvif_stats { * A subset of struct net_device_stats that contains only the * fields that are updated in netback.c for each queue. */ - unsigned int rx_bytes; - unsigned int rx_packets; - unsigned int tx_bytes; - unsigned int tx_packets; + u64 rx_bytes; + u64 rx_packets; + u64 tx_bytes; + u64 tx_packets; /* Additional stats used by xenvif */ unsigned long rx_gso_checksum_fixup; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 1073b27e54aa..a2d326760a72 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -221,10 +221,10 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); struct xenvif_queue *queue = NULL; - unsigned long rx_bytes = 0; - unsigned long rx_packets = 0; - unsigned long tx_bytes = 0; - unsigned long tx_packets = 0; + u64 rx_bytes = 0; + u64 rx_packets = 0; + u64 tx_bytes = 0; + u64 tx_packets = 0; unsigned int index; spin_lock(&vif->lock); diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c index 717529331dac..2dd1c68e6de8 100644 --- a/drivers/pci/pcie/pme.c +++ b/drivers/pci/pcie/pme.c @@ -433,6 +433,17 @@ static int pcie_pme_resume(struct pcie_device *srv) return 0; } +/** + * pcie_pme_remove - Prepare PCIe PME service device for removal. + * @srv - PCIe service device to remove. + */ +static void pcie_pme_remove(struct pcie_device *srv) +{ + pcie_pme_suspend(srv); + free_irq(srv->irq, srv); + kfree(get_service_data(srv)); +} + static struct pcie_port_service_driver pcie_pme_driver = { .name = "pcie_pme", .port_type = PCI_EXP_TYPE_ROOT_PORT, @@ -441,6 +452,7 @@ static struct pcie_port_service_driver pcie_pme_driver = { .probe = pcie_pme_probe, .suspend = pcie_pme_suspend, .resume = pcie_pme_resume, + .remove = pcie_pme_remove, }; /** diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 9c13381b6966..e8142803a1a7 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -221,18 +221,17 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, mutex_init(&ptp->pincfg_mux); init_waitqueue_head(&ptp->tsev_wq); + err = ptp_populate_pin_groups(ptp); + if (err) + goto no_pin_groups; + /* Create a new device in our class. */ - ptp->dev = device_create(ptp_class, parent, ptp->devid, ptp, - "ptp%d", ptp->index); + ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid, + ptp, ptp->pin_attr_groups, + "ptp%d", ptp->index); if (IS_ERR(ptp->dev)) goto no_device; - dev_set_drvdata(ptp->dev, ptp); - - err = ptp_populate_sysfs(ptp); - if (err) - goto no_sysfs; - /* Register a new PPS source. */ if (info->pps) { struct pps_source_info pps; @@ -260,10 +259,10 @@ no_clock: if (ptp->pps_source) pps_unregister_source(ptp->pps_source); no_pps: - ptp_cleanup_sysfs(ptp); -no_sysfs: device_destroy(ptp_class, ptp->devid); no_device: + ptp_cleanup_pin_groups(ptp); +no_pin_groups: mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); ida_simple_remove(&ptp_clocks_map, index); @@ -282,8 +281,9 @@ int ptp_clock_unregister(struct ptp_clock *ptp) /* Release the clock's resources. */ if (ptp->pps_source) pps_unregister_source(ptp->pps_source); - ptp_cleanup_sysfs(ptp); + device_destroy(ptp_class, ptp->devid); + ptp_cleanup_pin_groups(ptp); posix_clock_unregister(&ptp->clock); return 0; diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 9c5d41421b65..d95888974d0c 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -54,6 +54,8 @@ struct ptp_clock { struct device_attribute *pin_dev_attr; struct attribute **pin_attr; struct attribute_group pin_attr_group; + /* 1st entry is a pointer to the real group, 2nd is NULL terminator */ + const struct attribute_group *pin_attr_groups[2]; }; /* @@ -94,8 +96,7 @@ uint ptp_poll(struct posix_clock *pc, extern const struct attribute_group *ptp_groups[]; -int ptp_cleanup_sysfs(struct ptp_clock *ptp); - -int ptp_populate_sysfs(struct ptp_clock *ptp); +int ptp_populate_pin_groups(struct ptp_clock *ptp); +void ptp_cleanup_pin_groups(struct ptp_clock *ptp); #endif diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index 53d43954a974..48401dfcd999 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -46,27 +46,6 @@ PTP_SHOW_INT(n_periodic_outputs, n_per_out); PTP_SHOW_INT(n_programmable_pins, n_pins); PTP_SHOW_INT(pps_available, pps); -static struct attribute *ptp_attrs[] = { - &dev_attr_clock_name.attr, - &dev_attr_max_adjustment.attr, - &dev_attr_n_alarms.attr, - &dev_attr_n_external_timestamps.attr, - &dev_attr_n_periodic_outputs.attr, - &dev_attr_n_programmable_pins.attr, - &dev_attr_pps_available.attr, - NULL, -}; - -static const struct attribute_group ptp_group = { - .attrs = ptp_attrs, -}; - -const struct attribute_group *ptp_groups[] = { - &ptp_group, - NULL, -}; - - static ssize_t extts_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -91,6 +70,7 @@ static ssize_t extts_enable_store(struct device *dev, out: return err; } +static DEVICE_ATTR(extts_enable, 0220, NULL, extts_enable_store); static ssize_t extts_fifo_show(struct device *dev, struct device_attribute *attr, char *page) @@ -124,6 +104,7 @@ out: mutex_unlock(&ptp->tsevq_mux); return cnt; } +static DEVICE_ATTR(fifo, 0444, extts_fifo_show, NULL); static ssize_t period_store(struct device *dev, struct device_attribute *attr, @@ -151,6 +132,7 @@ static ssize_t period_store(struct device *dev, out: return err; } +static DEVICE_ATTR(period, 0220, NULL, period_store); static ssize_t pps_enable_store(struct device *dev, struct device_attribute *attr, @@ -177,6 +159,57 @@ static ssize_t pps_enable_store(struct device *dev, out: return err; } +static DEVICE_ATTR(pps_enable, 0220, NULL, pps_enable_store); + +static struct attribute *ptp_attrs[] = { + &dev_attr_clock_name.attr, + + &dev_attr_max_adjustment.attr, + &dev_attr_n_alarms.attr, + &dev_attr_n_external_timestamps.attr, + &dev_attr_n_periodic_outputs.attr, + &dev_attr_n_programmable_pins.attr, + &dev_attr_pps_available.attr, + + &dev_attr_extts_enable.attr, + &dev_attr_fifo.attr, + &dev_attr_period.attr, + &dev_attr_pps_enable.attr, + NULL +}; + +static umode_t ptp_is_attribute_visible(struct kobject *kobj, + struct attribute *attr, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct ptp_clock *ptp = dev_get_drvdata(dev); + struct ptp_clock_info *info = ptp->info; + umode_t mode = attr->mode; + + if (attr == &dev_attr_extts_enable.attr || + attr == &dev_attr_fifo.attr) { + if (!info->n_ext_ts) + mode = 0; + } else if (attr == &dev_attr_period.attr) { + if (!info->n_per_out) + mode = 0; + } else if (attr == &dev_attr_pps_enable.attr) { + if (!info->pps) + mode = 0; + } + + return mode; +} + +static const struct attribute_group ptp_group = { + .is_visible = ptp_is_attribute_visible, + .attrs = ptp_attrs, +}; + +const struct attribute_group *ptp_groups[] = { + &ptp_group, + NULL +}; static int ptp_pin_name2index(struct ptp_clock *ptp, const char *name) { @@ -235,47 +268,20 @@ static ssize_t ptp_pin_store(struct device *dev, struct device_attribute *attr, return count; } -static DEVICE_ATTR(extts_enable, 0220, NULL, extts_enable_store); -static DEVICE_ATTR(fifo, 0444, extts_fifo_show, NULL); -static DEVICE_ATTR(period, 0220, NULL, period_store); -static DEVICE_ATTR(pps_enable, 0220, NULL, pps_enable_store); - -int ptp_cleanup_sysfs(struct ptp_clock *ptp) +int ptp_populate_pin_groups(struct ptp_clock *ptp) { - struct device *dev = ptp->dev; - struct ptp_clock_info *info = ptp->info; - - if (info->n_ext_ts) { - device_remove_file(dev, &dev_attr_extts_enable); - device_remove_file(dev, &dev_attr_fifo); - } - if (info->n_per_out) - device_remove_file(dev, &dev_attr_period); - - if (info->pps) - device_remove_file(dev, &dev_attr_pps_enable); - - if (info->n_pins) { - sysfs_remove_group(&dev->kobj, &ptp->pin_attr_group); - kfree(ptp->pin_attr); - kfree(ptp->pin_dev_attr); - } - return 0; -} - -static int ptp_populate_pins(struct ptp_clock *ptp) -{ - struct device *dev = ptp->dev; struct ptp_clock_info *info = ptp->info; int err = -ENOMEM, i, n_pins = info->n_pins; - ptp->pin_dev_attr = kzalloc(n_pins * sizeof(*ptp->pin_dev_attr), + if (!n_pins) + return 0; + + ptp->pin_dev_attr = kcalloc(n_pins, sizeof(*ptp->pin_dev_attr), GFP_KERNEL); if (!ptp->pin_dev_attr) goto no_dev_attr; - ptp->pin_attr = kzalloc((1 + n_pins) * sizeof(struct attribute *), - GFP_KERNEL); + ptp->pin_attr = kcalloc(1 + n_pins, sizeof(*ptp->pin_attr), GFP_KERNEL); if (!ptp->pin_attr) goto no_pin_attr; @@ -292,61 +298,18 @@ static int ptp_populate_pins(struct ptp_clock *ptp) ptp->pin_attr_group.name = "pins"; ptp->pin_attr_group.attrs = ptp->pin_attr; - err = sysfs_create_group(&dev->kobj, &ptp->pin_attr_group); - if (err) - goto no_group; + ptp->pin_attr_groups[0] = &ptp->pin_attr_group; + return 0; -no_group: - kfree(ptp->pin_attr); no_pin_attr: kfree(ptp->pin_dev_attr); no_dev_attr: return err; } -int ptp_populate_sysfs(struct ptp_clock *ptp) +void ptp_cleanup_pin_groups(struct ptp_clock *ptp) { - struct device *dev = ptp->dev; - struct ptp_clock_info *info = ptp->info; - int err; - - if (info->n_ext_ts) { - err = device_create_file(dev, &dev_attr_extts_enable); - if (err) - goto out1; - err = device_create_file(dev, &dev_attr_fifo); - if (err) - goto out2; - } - if (info->n_per_out) { - err = device_create_file(dev, &dev_attr_period); - if (err) - goto out3; - } - if (info->pps) { - err = device_create_file(dev, &dev_attr_pps_enable); - if (err) - goto out4; - } - if (info->n_pins) { - err = ptp_populate_pins(ptp); - if (err) - goto out5; - } - return 0; -out5: - if (info->pps) - device_remove_file(dev, &dev_attr_pps_enable); -out4: - if (info->n_per_out) - device_remove_file(dev, &dev_attr_period); -out3: - if (info->n_ext_ts) - device_remove_file(dev, &dev_attr_fifo); -out2: - if (info->n_ext_ts) - device_remove_file(dev, &dev_attr_extts_enable); -out1: - return err; + kfree(ptp->pin_attr); + kfree(ptp->pin_dev_attr); } diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 75f820ca17b7..27ff38f839fc 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -1583,7 +1583,7 @@ out: int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port) { struct zfcp_qdio *qdio = wka_port->adapter->qdio; - struct zfcp_fsf_req *req = NULL; + struct zfcp_fsf_req *req; int retval = -EIO; spin_lock_irq(&qdio->req_q_lock); @@ -1612,7 +1612,7 @@ int zfcp_fsf_open_wka_port(struct zfcp_fc_wka_port *wka_port) zfcp_fsf_req_free(req); out: spin_unlock_irq(&qdio->req_q_lock); - if (req && !IS_ERR(req)) + if (!retval) zfcp_dbf_rec_run_wka("fsowp_1", wka_port, req->req_id); return retval; } @@ -1638,7 +1638,7 @@ static void zfcp_fsf_close_wka_port_handler(struct zfcp_fsf_req *req) int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port) { struct zfcp_qdio *qdio = wka_port->adapter->qdio; - struct zfcp_fsf_req *req = NULL; + struct zfcp_fsf_req *req; int retval = -EIO; spin_lock_irq(&qdio->req_q_lock); @@ -1667,7 +1667,7 @@ int zfcp_fsf_close_wka_port(struct zfcp_fc_wka_port *wka_port) zfcp_fsf_req_free(req); out: spin_unlock_irq(&qdio->req_q_lock); - if (req && !IS_ERR(req)) + if (!retval) zfcp_dbf_rec_run_wka("fscwp_1", wka_port, req->req_id); return retval; } diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 4f56b1003cc7..5b48bedd7c38 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -50,9 +50,13 @@ struct aac_common aac_config = { static inline int aac_is_msix_mode(struct aac_dev *dev) { - u32 status; + u32 status = 0; - status = src_readl(dev, MUnit.OMR); + if (dev->pdev->device == PMC_DEVICE_S6 || + dev->pdev->device == PMC_DEVICE_S7 || + dev->pdev->device == PMC_DEVICE_S8) { + status = src_readl(dev, MUnit.OMR); + } return (status & AAC_INT_MODE_MSIX); } diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 75f3fce1c867..0b5b423b1db0 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -51,6 +51,7 @@ #include <linux/workqueue.h> #include <linux/delay.h> #include <linux/pci.h> +#include <linux/pci-aspm.h> #include <linux/interrupt.h> #include <linux/aer.h> #include <linux/raid_class.h> @@ -4657,6 +4658,7 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) struct MPT3SAS_DEVICE *sas_device_priv_data; u32 response_code = 0; unsigned long flags; + unsigned int sector_sz; mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply); scmd = _scsih_scsi_lookup_get_clear(ioc, smid); @@ -4715,6 +4717,20 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) } xfer_cnt = le32_to_cpu(mpi_reply->TransferCount); + + /* In case of bogus fw or device, we could end up having + * unaligned partial completion. We can force alignment here, + * then scsi-ml does not need to handle this misbehavior. + */ + sector_sz = scmd->device->sector_size; + if (unlikely(scmd->request->cmd_type == REQ_TYPE_FS && sector_sz && + xfer_cnt % sector_sz)) { + sdev_printk(KERN_INFO, scmd->device, + "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n", + xfer_cnt, sector_sz); + xfer_cnt = round_down(xfer_cnt, sector_sz); + } + scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_cnt); if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) log_info = le32_to_cpu(mpi_reply->IOCLogInfo); @@ -8746,6 +8762,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) switch (hba_mpi_version) { case MPI2_VERSION: + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | + PCIE_LINK_STATE_L1 | PCIE_LINK_STATE_CLKPM); /* Use mpt2sas driver host template for SAS 2.0 HBA's */ shost = scsi_host_alloc(&mpt2sas_driver_template, sizeof(struct MPT3SAS_ADAPTER)); diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index dc88a09f9043..a94b0b6bd030 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3242,7 +3242,7 @@ qla2x00_free_irqs(scsi_qla_host_t *vha) * from a probe failure context. */ if (!ha->rsp_q_map || !ha->rsp_q_map[0]) - return; + goto free_irqs; rsp = ha->rsp_q_map[0]; if (ha->flags.msix_enabled) { @@ -3262,6 +3262,7 @@ qla2x00_free_irqs(scsi_qla_host_t *vha) free_irq(pci_irq_vector(ha->pdev, 0), rsp); } +free_irqs: pci_free_irq_vectors(ha->pdev); } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 0a000ecf0881..40660461a4b5 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1616,7 +1616,7 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) /* Don't abort commands in adapter during EEH * recovery as it's not accessible/responding. */ - if (!ha->flags.eeh_busy) { + if (GET_CMD_SP(sp) && !ha->flags.eeh_busy) { /* Get a reference to the sp and drop the lock. * The reference ensures this sp->done() call * - and not the call in qla2xxx_eh_abort() - diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index e349a3316303..2edbdcbf6432 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -351,7 +351,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_server *server; - struct afs_uuid *r; + struct uuid_v1 *r; unsigned loop; __be32 *b; int ret; @@ -381,15 +381,15 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) } _debug("unmarshall UUID"); - call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); + call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL); if (!call->request) return -ENOMEM; b = call->buffer; r = call->request; - r->time_low = ntohl(b[0]); - r->time_mid = ntohl(b[1]); - r->time_hi_and_version = ntohl(b[2]); + r->time_low = b[0]; + r->time_mid = htons(ntohl(b[1])); + r->time_hi_and_version = htons(ntohl(b[2])); r->clock_seq_hi_and_reserved = ntohl(b[3]); r->clock_seq_low = ntohl(b[4]); @@ -454,7 +454,7 @@ static int afs_deliver_cb_probe(struct afs_call *call) static void SRXAFSCB_ProbeUuid(struct work_struct *work) { struct afs_call *call = container_of(work, struct afs_call, work); - struct afs_uuid *r = call->request; + struct uuid_v1 *r = call->request; struct { __be32 match; @@ -477,7 +477,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) */ static int afs_deliver_cb_probe_uuid(struct afs_call *call) { - struct afs_uuid *r; + struct uuid_v1 *r; unsigned loop; __be32 *b; int ret; @@ -503,15 +503,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) } _debug("unmarshall UUID"); - call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); + call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL); if (!call->request) return -ENOMEM; b = call->buffer; r = call->request; - r->time_low = ntohl(b[0]); - r->time_mid = ntohl(b[1]); - r->time_hi_and_version = ntohl(b[2]); + r->time_low = b[0]; + r->time_mid = htons(ntohl(b[1])); + r->time_hi_and_version = htons(ntohl(b[2])); r->clock_seq_hi_and_reserved = ntohl(b[3]); r->clock_seq_low = ntohl(b[4]); @@ -569,9 +569,9 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) memset(&reply, 0, sizeof(reply)); reply.ia.nifs = htonl(nifs); - reply.ia.uuid[0] = htonl(afs_uuid.time_low); - reply.ia.uuid[1] = htonl(afs_uuid.time_mid); - reply.ia.uuid[2] = htonl(afs_uuid.time_hi_and_version); + reply.ia.uuid[0] = afs_uuid.time_low; + reply.ia.uuid[1] = htonl(ntohs(afs_uuid.time_mid)); + reply.ia.uuid[2] = htonl(ntohs(afs_uuid.time_hi_and_version)); reply.ia.uuid[3] = htonl((s8) afs_uuid.clock_seq_hi_and_reserved); reply.ia.uuid[4] = htonl((s8) afs_uuid.clock_seq_low); for (loop = 0; loop < 6; loop++) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 65504e218d35..8acf3670e756 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -19,6 +19,7 @@ #include <linux/sched.h> #include <linux/fscache.h> #include <linux/backing-dev.h> +#include <linux/uuid.h> #include <net/af_rxrpc.h> #include "afs.h" @@ -407,30 +408,6 @@ struct afs_interface { unsigned mtu; /* MTU of interface */ }; -/* - * UUID definition [internet draft] - * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns - * increments since midnight 15th October 1582 - * - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID - * time - * - the clock sequence is a 14-bit counter to avoid duplicate times - */ -struct afs_uuid { - u32 time_low; /* low part of timestamp */ - u16 time_mid; /* mid part of timestamp */ - u16 time_hi_and_version; /* high part of timestamp and version */ -#define AFS_UUID_TO_UNIX_TIME 0x01b21dd213814000ULL -#define AFS_UUID_TIMEHI_MASK 0x0fff -#define AFS_UUID_VERSION_TIME 0x1000 /* time-based UUID */ -#define AFS_UUID_VERSION_NAME 0x3000 /* name-based UUID */ -#define AFS_UUID_VERSION_RANDOM 0x4000 /* (pseudo-)random generated UUID */ - u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ -#define AFS_UUID_CLOCKHI_MASK 0x3f -#define AFS_UUID_VARIANT_STD 0x80 - u8 clock_seq_low; /* clock seq low */ - u8 node[6]; /* spatially unique node ID (MAC addr) */ -}; - /*****************************************************************************/ /* * cache.c @@ -565,7 +542,7 @@ extern int afs_drop_inode(struct inode *); * main.c */ extern struct workqueue_struct *afs_wq; -extern struct afs_uuid afs_uuid; +extern struct uuid_v1 afs_uuid; /* * misc.c @@ -584,6 +561,11 @@ extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); extern void afs_mntpt_kill_timer(void); /* + * netdevices.c + */ +extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool); + +/* * proc.c */ extern int afs_proc_init(void); @@ -647,12 +629,6 @@ extern int afs_fs_init(void); extern void afs_fs_exit(void); /* - * use-rtnetlink.c - */ -extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool); -extern int afs_get_MAC_address(u8 *, size_t); - -/* * vlclient.c */ extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *, diff --git a/fs/afs/main.c b/fs/afs/main.c index f8188feb03ad..51d7d17bca57 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -31,53 +31,10 @@ static char *rootcell; module_param(rootcell, charp, 0); MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); -struct afs_uuid afs_uuid; +struct uuid_v1 afs_uuid; struct workqueue_struct *afs_wq; /* - * get a client UUID - */ -static int __init afs_get_client_UUID(void) -{ - struct timespec ts; - u64 uuidtime; - u16 clockseq; - int ret; - - /* read the MAC address of one of the external interfaces and construct - * a UUID from it */ - ret = afs_get_MAC_address(afs_uuid.node, sizeof(afs_uuid.node)); - if (ret < 0) - return ret; - - getnstimeofday(&ts); - uuidtime = (u64) ts.tv_sec * 1000 * 1000 * 10; - uuidtime += ts.tv_nsec / 100; - uuidtime += AFS_UUID_TO_UNIX_TIME; - afs_uuid.time_low = uuidtime; - afs_uuid.time_mid = uuidtime >> 32; - afs_uuid.time_hi_and_version = (uuidtime >> 48) & AFS_UUID_TIMEHI_MASK; - afs_uuid.time_hi_and_version |= AFS_UUID_VERSION_TIME; - - get_random_bytes(&clockseq, 2); - afs_uuid.clock_seq_low = clockseq; - afs_uuid.clock_seq_hi_and_reserved = - (clockseq >> 8) & AFS_UUID_CLOCKHI_MASK; - afs_uuid.clock_seq_hi_and_reserved |= AFS_UUID_VARIANT_STD; - - _debug("AFS UUID: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", - afs_uuid.time_low, - afs_uuid.time_mid, - afs_uuid.time_hi_and_version, - afs_uuid.clock_seq_hi_and_reserved, - afs_uuid.clock_seq_low, - afs_uuid.node[0], afs_uuid.node[1], afs_uuid.node[2], - afs_uuid.node[3], afs_uuid.node[4], afs_uuid.node[5]); - - return 0; -} - -/* * initialise the AFS client FS module */ static int __init afs_init(void) @@ -86,9 +43,7 @@ static int __init afs_init(void) printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n"); - ret = afs_get_client_UUID(); - if (ret < 0) - return ret; + generate_random_uuid((unsigned char *)&afs_uuid); /* create workqueue */ ret = -ENOMEM; diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c index 7ad36506c256..40b2bab3e401 100644 --- a/fs/afs/netdevices.c +++ b/fs/afs/netdevices.c @@ -12,27 +12,6 @@ #include "internal.h" /* - * get a MAC address from a random ethernet interface that has a real one - * - the buffer will normally be 6 bytes in size - */ -int afs_get_MAC_address(u8 *mac, size_t maclen) -{ - struct net_device *dev; - int ret = -ENODEV; - - BUG_ON(maclen != ETH_ALEN); - - rtnl_lock(); - dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER); - if (dev) { - memcpy(mac, dev->dev_addr, maclen); - ret = 0; - } - rtnl_unlock(); - return ret; -} - -/* * get a list of this system's interface IPv4 addresses, netmasks and MTUs * - maxbufs must be at least 1 * - returns the number of interface records in the buffer diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 7f390849343b..c4444d6f439f 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1024,6 +1024,7 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, unsigned long buf_offset; unsigned long current_buf_start; unsigned long start_byte; + unsigned long prev_start_byte; unsigned long working_bytes = total_out - buf_start; unsigned long bytes; char *kaddr; @@ -1071,26 +1072,34 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, if (!bio->bi_iter.bi_size) return 0; bvec = bio_iter_iovec(bio, bio->bi_iter); - + prev_start_byte = start_byte; start_byte = page_offset(bvec.bv_page) - disk_start; /* - * make sure our new page is covered by this - * working buffer + * We need to make sure we're only adjusting + * our offset into compression working buffer when + * we're switching pages. Otherwise we can incorrectly + * keep copying when we were actually done. */ - if (total_out <= start_byte) - return 1; + if (start_byte != prev_start_byte) { + /* + * make sure our new page is covered by this + * working buffer + */ + if (total_out <= start_byte) + return 1; - /* - * the next page in the biovec might not be adjacent - * to the last page, but it might still be found - * inside this working buffer. bump our offset pointer - */ - if (total_out > start_byte && - current_buf_start < start_byte) { - buf_offset = start_byte - buf_start; - working_bytes = total_out - start_byte; - current_buf_start = buf_start + buf_offset; + /* + * the next page in the biovec might not be adjacent + * to the last page, but it might still be found + * inside this working buffer. bump our offset pointer + */ + if (total_out > start_byte && + current_buf_start < start_byte) { + buf_offset = start_byte - buf_start; + working_bytes = total_out - start_byte; + current_buf_start = buf_start + buf_offset; + } } } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 33f967d30b2a..21e51b0ba188 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -5653,6 +5653,10 @@ long btrfs_ioctl(struct file *file, unsigned int #ifdef CONFIG_COMPAT long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + /* + * These all access 32-bit values anyway so no further + * handling is necessary. + */ switch (cmd) { case FS_IOC32_GETFLAGS: cmd = FS_IOC_GETFLAGS; @@ -5663,8 +5667,6 @@ long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC32_GETVERSION: cmd = FS_IOC_GETVERSION; break; - default: - return -ENOIOCTLCMD; } return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 4e06a27ed7f8..f11792672977 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -399,6 +399,10 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) { spin_lock(&fiq->waitq.lock); + if (test_bit(FR_FINISHED, &req->flags)) { + spin_unlock(&fiq->waitq.lock); + return; + } if (list_empty(&req->intr_entry)) { list_add_tail(&req->intr_entry, &fiq->interrupts); wake_up_locked(&fiq->waitq); @@ -1372,6 +1376,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, * code can Oops if the buffer persists after module unload. */ bufs[page_nr].ops = &nosteal_pipe_buf_ops; + bufs[page_nr].flags = 0; ret = add_to_pipe(pipe, &bufs[page_nr++]); if (unlikely(ret < 0)) break; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 94f50cac91c6..70e94170af85 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1420,26 +1420,32 @@ static struct shrinker glock_shrinker = { * @sdp: the filesystem * @bucket: the bucket * + * Note that the function can be called multiple times on the same + * object. So the user must ensure that the function can cope with + * that. */ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) { struct gfs2_glock *gl; - struct rhash_head *pos; - const struct bucket_table *tbl; - int i; + struct rhashtable_iter iter; - rcu_read_lock(); - tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table); - for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_rcu(gl, pos, tbl, i, gl_node) { + rhashtable_walk_enter(&gl_hash_table, &iter); + + do { + gl = ERR_PTR(rhashtable_walk_start(&iter)); + if (gl) + continue; + + while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) if ((gl->gl_name.ln_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) examiner(gl); - } - } - rcu_read_unlock(); - cond_resched(); + + rhashtable_walk_stop(&iter); + } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); + + rhashtable_walk_exit(&iter); } /** diff --git a/fs/splice.c b/fs/splice.c index 873d83104e79..4ef78aa8ef61 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -204,6 +204,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, buf->len = spd->partial[page_nr].len; buf->private = spd->partial[page_nr].private; buf->ops = spd->ops; + buf->flags = 0; pipe->nrbufs++; page_nr++; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 92bc89ae7e20..c970a25d2a49 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -21,20 +21,19 @@ struct cgroup_bpf { */ struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; + bool disallow_override[MAX_BPF_ATTACH_TYPE]; }; void cgroup_bpf_put(struct cgroup *cgrp); void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); -void __cgroup_bpf_update(struct cgroup *cgrp, - struct cgroup *parent, - struct bpf_prog *prog, - enum bpf_attach_type type); +int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, + struct bpf_prog *prog, enum bpf_attach_type type, + bool overridable); /* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ -void cgroup_bpf_update(struct cgroup *cgrp, - struct bpf_prog *prog, - enum bpf_attach_type type); +int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, bool overridable); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 54c82b5df0ba..f40f0ab3847a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -352,6 +352,7 @@ enum gro_result { GRO_HELD, GRO_NORMAL, GRO_DROP, + GRO_CONSUMED, }; typedef enum gro_result gro_result_t; @@ -1865,8 +1866,12 @@ struct net_device { struct pcpu_vstats __percpu *vstats; }; +#if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; +#endif +#if IS_ENABLED(CONFIG_MRP) struct mrp_port __rcu *mrp_port; +#endif struct device dev; const struct attribute_group *sysfs_groups[4]; @@ -2663,6 +2668,19 @@ static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, remcsum_unadjust((__sum16 *)ptr, grc->delta); } +#ifdef CONFIG_XFRM_OFFLOAD +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) +{ + if (PTR_ERR(pp) != -EINPROGRESS) + NAPI_GRO_CB(skb)->flush |= flush; +} +#else +static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) +{ + NAPI_GRO_CB(skb)->flush |= flush; +} +#endif + static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 3613d63cd5d0..4cd1f0ccfa36 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -96,6 +96,7 @@ struct qed_update_vport_params { struct qed_start_vport_params { bool remove_inner_vlan; + bool handle_ptp_pkts; bool gro_enable; bool drop_ttl0; u8 vport_id; @@ -159,6 +160,15 @@ struct qed_eth_cb_ops { void (*force_mac) (void *dev, u8 *mac, bool forced); }; +#define QED_MAX_PHC_DRIFT_PPB 291666666 + +enum qed_ptp_filter_type { + QED_PTP_FILTER_L2, + QED_PTP_FILTER_IPV4, + QED_PTP_FILTER_IPV4_IPV6, + QED_PTP_FILTER_L2_IPV4_IPV6 +}; + #ifdef CONFIG_DCB /* Prototype declaration of qed_eth_dcbnl_ops should match with the declaration * of dcbnl_rtnl_ops structure. @@ -218,6 +228,17 @@ struct qed_eth_dcbnl_ops { }; #endif +struct qed_eth_ptp_ops { + int (*hwtstamp_tx_on)(struct qed_dev *); + int (*cfg_rx_filters)(struct qed_dev *, enum qed_ptp_filter_type); + int (*read_rx_ts)(struct qed_dev *, u64 *); + int (*read_tx_ts)(struct qed_dev *, u64 *); + int (*read_cc)(struct qed_dev *, u64 *); + int (*disable)(struct qed_dev *); + int (*adjfreq)(struct qed_dev *, s32); + int (*enable)(struct qed_dev *); +}; + struct qed_eth_ops { const struct qed_common_ops *common; #ifdef CONFIG_QED_SRIOV @@ -226,6 +247,7 @@ struct qed_eth_ops { #ifdef CONFIG_DCB const struct qed_eth_dcbnl_ops *dcb; #endif + const struct qed_eth_ptp_ops *ptp; int (*fill_dev_info)(struct qed_dev *cdev, struct qed_dev_eth_info *info); diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 5c132d3188be..f2e12a845910 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -61,6 +61,7 @@ struct rhlist_head { /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets + * @nest: Number of bits of first-level nested table. * @rehash: Current bucket being rehashed * @hash_rnd: Random seed to fold into hash * @locks_mask: Mask to apply before accessing locks[] @@ -68,10 +69,12 @@ struct rhlist_head { * @walkers: List of active walkers * @rcu: RCU structure for freeing the table * @future_tbl: Table under construction during rehashing + * @ntbl: Nested table used when out of memory. * @buckets: size * hash buckets */ struct bucket_table { unsigned int size; + unsigned int nest; unsigned int rehash; u32 hash_rnd; unsigned int locks_mask; @@ -81,7 +84,7 @@ struct bucket_table { struct bucket_table __rcu *future_tbl; - struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; /** @@ -374,6 +377,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void *arg); void rhashtable_destroy(struct rhashtable *ht); +struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash); +struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash); + #define rht_dereference(p, ht) \ rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) @@ -389,6 +398,27 @@ void rhashtable_destroy(struct rhashtable *ht); #define rht_entry(tpos, pos, member) \ ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) +static inline struct rhash_head __rcu *const *rht_bucket( + const struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : + &tbl->buckets[hash]; +} + +static inline struct rhash_head __rcu **rht_bucket_var( + struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : + &tbl->buckets[hash]; +} + +static inline struct rhash_head __rcu **rht_bucket_insert( + struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) +{ + return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : + &tbl->buckets[hash]; +} + /** * rht_for_each_continue - continue iterating over hash chain * @pos: the &struct rhash_head to use as a loop cursor. @@ -408,7 +438,7 @@ void rhashtable_destroy(struct rhashtable *ht); * @hash: the hash value / bucket index */ #define rht_for_each(pos, tbl, hash) \ - rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash) + rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash) /** * rht_for_each_entry_continue - continue iterating over hash chain @@ -433,7 +463,7 @@ void rhashtable_destroy(struct rhashtable *ht); * @member: name of the &struct rhash_head within the hashable struct. */ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \ + rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \ tbl, hash, member) /** @@ -448,13 +478,13 @@ void rhashtable_destroy(struct rhashtable *ht); * This hash chain list-traversal primitive allows for the looped code to * remove the loop cursor from the list. */ -#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ - for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ - next = !rht_is_a_nulls(pos) ? \ - rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ - (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ - pos = next, \ - next = !rht_is_a_nulls(pos) ? \ +#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ + for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ + pos = next, \ + next = !rht_is_a_nulls(pos) ? \ rht_dereference_bucket(pos->next, tbl, hash) : NULL) /** @@ -485,7 +515,7 @@ void rhashtable_destroy(struct rhashtable *ht); * traversal is guarded by rcu_read_lock(). */ #define rht_for_each_rcu(pos, tbl, hash) \ - rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash) + rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash) /** * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain @@ -518,8 +548,8 @@ void rhashtable_destroy(struct rhashtable *ht); * the _rcu mutation primitives such as rhashtable_insert() as long as the * traversal is guarded by rcu_read_lock(). */ -#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ - rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ +#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ + rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \ tbl, hash, member) /** @@ -565,7 +595,7 @@ static inline struct rhash_head *__rhashtable_lookup( .ht = ht, .key = key, }; - const struct bucket_table *tbl; + struct bucket_table *tbl; struct rhash_head *he; unsigned int hash; @@ -697,8 +727,12 @@ slow_path: } elasticity = ht->elasticity; - pprev = &tbl->buckets[hash]; - rht_for_each(head, tbl, hash) { + pprev = rht_bucket_insert(ht, tbl, hash); + data = ERR_PTR(-ENOMEM); + if (!pprev) + goto out; + + rht_for_each_continue(head, *pprev, tbl, hash) { struct rhlist_head *plist; struct rhlist_head *list; @@ -736,7 +770,7 @@ slow_path: if (unlikely(rht_grow_above_100(ht, tbl))) goto slow_path; - head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + head = rht_dereference_bucket(*pprev, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (rhlist) { @@ -746,7 +780,7 @@ slow_path: RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(tbl->buckets[hash], obj); + rcu_assign_pointer(*pprev, obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) @@ -955,8 +989,8 @@ static inline int __rhashtable_remove_fast_one( spin_lock_bh(lock); - pprev = &tbl->buckets[hash]; - rht_for_each(he, tbl, hash) { + pprev = rht_bucket_var(tbl, hash); + rht_for_each_continue(he, *pprev, tbl, hash) { struct rhlist_head *list; list = container_of(he, struct rhlist_head, rhead); @@ -1107,8 +1141,8 @@ static inline int __rhashtable_replace_fast( spin_lock_bh(lock); - pprev = &tbl->buckets[hash]; - rht_for_each(he, tbl, hash) { + pprev = rht_bucket_var(tbl, hash); + rht_for_each_continue(he, *pprev, tbl, hash) { if (he != obj_old) { pprev = &he->next; continue; diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 2d095fc60204..4dff73a89758 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -19,6 +19,30 @@ #include <uapi/linux/uuid.h> /* + * V1 (time-based) UUID definition [RFC 4122]. + * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns + * increments since midnight 15th October 1582 + * - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID + * time + * - the clock sequence is a 14-bit counter to avoid duplicate times + */ +struct uuid_v1 { + __be32 time_low; /* low part of timestamp */ + __be16 time_mid; /* mid part of timestamp */ + __be16 time_hi_and_version; /* high part of timestamp and version */ +#define UUID_TO_UNIX_TIME 0x01b21dd213814000ULL +#define UUID_TIMEHI_MASK 0x0fff +#define UUID_VERSION_TIME 0x1000 /* time-based UUID */ +#define UUID_VERSION_NAME 0x3000 /* name-based UUID */ +#define UUID_VERSION_RANDOM 0x4000 /* (pseudo-)random generated UUID */ + u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ +#define UUID_CLOCKHI_MASK 0x3f +#define UUID_VARIANT_STD 0x80 + u8 clock_seq_low; /* clock seq low */ + u8 node[6]; /* spatially unique node ID (MAC addr) */ +}; + +/* * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") * not including trailing NUL. */ diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index d73b849e29a6..b8d637225a07 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -33,10 +33,6 @@ struct napi_struct; extern unsigned int sysctl_net_busy_read __read_mostly; extern unsigned int sysctl_net_busy_poll __read_mostly; -/* return values from ndo_ll_poll */ -#define LL_FLUSH_FAILED -1 -#define LL_FLUSH_BUSY -2 - static inline bool net_busy_loop_on(void) { return sysctl_net_busy_poll; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 71b266cd63d4..269fd78bb0ae 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -17,7 +17,13 @@ struct tcf_walker { int register_tcf_proto_ops(struct tcf_proto_ops *ops); int unregister_tcf_proto_ops(struct tcf_proto_ops *ops); +#ifdef CONFIG_NET_CLS void tcf_destroy_chain(struct tcf_proto __rcu **fl); +#else +static inline void tcf_destroy_chain(struct tcf_proto __rcu **fl) +{ +} +#endif static inline unsigned long __cls_set_class(unsigned long *clp, unsigned long cl) @@ -475,6 +481,11 @@ static inline bool tc_flags_valid(u32 flags) return true; } +static inline bool tc_in_hw(u32 flags) +{ + return (flags & TCA_CLS_FLAGS_IN_HW) ? true : false; +} + enum tc_fl_command { TC_CLSFLOWER_REPLACE, TC_CLSFLOWER_DESTROY, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d9a81dcef53e..14d82bf16692 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -280,9 +280,7 @@ struct net_device; struct xfrm_type; struct xfrm_dst; struct xfrm_policy_afinfo { - unsigned short family; struct dst_ops *dst_ops; - void (*garbage_collect)(struct net *net); struct dst_entry *(*dst_lookup)(struct net *net, int tos, int oif, const xfrm_address_t *saddr, @@ -303,8 +301,8 @@ struct xfrm_policy_afinfo { struct dst_entry *(*blackhole_route)(struct net *net, struct dst_entry *orig); }; -int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo); -int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo); +int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family); +void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo); void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c); void km_state_notify(struct xfrm_state *x, const struct km_event *c); @@ -349,13 +347,12 @@ struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family); struct xfrm_input_afinfo { unsigned int family; - struct module *owner; int (*callback)(struct sk_buff *skb, u8 protocol, int err); }; -int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo); -int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo); +int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo); +int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo); void xfrm_state_delete_tunnel(struct xfrm_state *x); @@ -501,6 +498,7 @@ struct xfrm_tmpl { }; #define XFRM_MAX_DEPTH 6 +#define XFRM_MAX_OFFLOAD_DEPTH 1 struct xfrm_policy_walk_entry { struct list_head all; @@ -684,6 +682,7 @@ struct xfrm_spi_skb_cb { unsigned int daddroff; unsigned int family; + __be32 seq; }; #define XFRM_SPI_SKB_CB(__skb) ((struct xfrm_spi_skb_cb *)&((__skb)->cb[0])) @@ -976,10 +975,41 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); +struct xfrm_offload { + /* Output sequence number for replay protection on offloading. */ + struct { + __u32 low; + __u32 hi; + } seq; + + __u32 flags; +#define SA_DELETE_REQ 1 +#define CRYPTO_DONE 2 +#define CRYPTO_NEXT_DONE 4 +#define CRYPTO_FALLBACK 8 +#define XFRM_GSO_SEGMENT 16 +#define XFRM_GRO 32 + + __u32 status; +#define CRYPTO_SUCCESS 1 +#define CRYPTO_GENERIC_ERROR 2 +#define CRYPTO_TRANSPORT_AH_AUTH_FAILED 4 +#define CRYPTO_TRANSPORT_ESP_AUTH_FAILED 8 +#define CRYPTO_TUNNEL_AH_AUTH_FAILED 16 +#define CRYPTO_TUNNEL_ESP_AUTH_FAILED 32 +#define CRYPTO_INVALID_PACKET_SYNTAX 64 +#define CRYPTO_INVALID_PROTOCOL 128 + + __u8 proto; +}; + struct sec_path { atomic_t refcnt; int len; + int olen; + struct xfrm_state *xvec[XFRM_MAX_DEPTH]; + struct xfrm_offload ovec[XFRM_MAX_OFFLOAD_DEPTH]; }; static inline int secpath_exists(struct sk_buff *skb) @@ -1009,6 +1039,7 @@ secpath_put(struct sec_path *sp) } struct sec_path *secpath_dup(struct sec_path *src); +int secpath_set(struct sk_buff *skb); static inline void secpath_reset(struct sk_buff *skb) @@ -1170,6 +1201,7 @@ static inline void xfrm_sk_free_policy(struct sock *sk) } void xfrm_garbage_collect(struct net *net); +void xfrm_garbage_collect_deferred(struct net *net); #else @@ -1521,6 +1553,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm4_transport_finish(struct sk_buff *skb, int async); int xfrm4_rcv(struct sk_buff *skb); +int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq); static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) { @@ -1776,6 +1809,15 @@ static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { return skb->sp->xvec[skb->sp->len - 1]; } +static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) +{ + struct sec_path *sp = skb->sp; + + if (!sp || !sp->olen || sp->len != sp->olen) + return NULL; + + return &sp->ovec[sp->olen - 1]; +} #endif static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e07fd5a324e6..0539a0ceef38 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -123,6 +123,12 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command + * to the given target_fd cgroup the descendent cgroup will be able to + * override effective bpf program that was inherited from this cgroup + */ +#define BPF_F_ALLOW_OVERRIDE (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -178,6 +184,7 @@ union bpf_attr { __u32 target_fd; /* container object to attach to */ __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; + __u32 attach_flags; }; } __attribute__((aligned(8))); diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 85ddb74fcd1c..b23c1914a182 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -9,9 +9,8 @@ #include <linux/types.h> #include <linux/socket.h> -#ifndef __KERNEL__ -#include <netinet/in.h> -#endif +#include <linux/in.h> +#include <linux/in6.h> #define IPPROTO_L2TP 115 @@ -31,7 +30,7 @@ struct sockaddr_l2tpip { __u32 l2tp_conn_id; /* Connection ID of tunnel */ /* Pad to size of `struct sockaddr'. */ - unsigned char __pad[sizeof(struct sockaddr) - + unsigned char __pad[__SOCK_SIZE__ - sizeof(__kernel_sa_family_t) - sizeof(__be16) - sizeof(struct in_addr) - sizeof(__u32)]; diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 345551e71410..7a69f2a4ca0c 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -103,8 +103,10 @@ enum { #define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1) /* tca flags definitions */ -#define TCA_CLS_FLAGS_SKIP_HW (1 << 0) -#define TCA_CLS_FLAGS_SKIP_SW (1 << 1) +#define TCA_CLS_FLAGS_SKIP_HW (1 << 0) /* don't offload filter to HW */ +#define TCA_CLS_FLAGS_SKIP_SW (1 << 1) /* don't use filter in SW */ +#define TCA_CLS_FLAGS_IN_HW (1 << 2) /* filter is offloaded to HW */ +#define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */ /* U32 filters */ diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 46e8a2e369f9..45184a2ef66c 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -362,8 +362,8 @@ enum v4l2_quantization { /* * The default for R'G'B' quantization is always full range, except * for the BT2020 colorspace. For Y'CbCr the quantization is always - * limited range, except for COLORSPACE_JPEG, SRGB, ADOBERGB, - * XV601 or XV709: those are full range. + * limited range, except for COLORSPACE_JPEG, XV601 or XV709: those + * are full range. */ V4L2_QUANTIZATION_DEFAULT = 0, V4L2_QUANTIZATION_FULL_RANGE = 1, @@ -379,8 +379,7 @@ enum v4l2_quantization { (((is_rgb_or_hsv) && (colsp) == V4L2_COLORSPACE_BT2020) ? \ V4L2_QUANTIZATION_LIM_RANGE : \ (((is_rgb_or_hsv) || (ycbcr_enc) == V4L2_YCBCR_ENC_XV601 || \ - (ycbcr_enc) == V4L2_YCBCR_ENC_XV709 || (colsp) == V4L2_COLORSPACE_JPEG) || \ - (colsp) == V4L2_COLORSPACE_ADOBERGB || (colsp) == V4L2_COLORSPACE_SRGB ? \ + (ycbcr_enc) == V4L2_YCBCR_ENC_XV709 || (colsp) == V4L2_COLORSPACE_JPEG) ? \ V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE)) enum v4l2_priority { diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index a515f7b007c6..da0f53690295 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -52,6 +52,7 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) e = rcu_dereference_protected(parent->bpf.effective[type], lockdep_is_held(&cgroup_mutex)); rcu_assign_pointer(cgrp->bpf.effective[type], e); + cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type]; } } @@ -82,30 +83,63 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) * * Must be called with cgroup_mutex held. */ -void __cgroup_bpf_update(struct cgroup *cgrp, - struct cgroup *parent, - struct bpf_prog *prog, - enum bpf_attach_type type) +int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, + struct bpf_prog *prog, enum bpf_attach_type type, + bool new_overridable) { - struct bpf_prog *old_prog, *effective; + struct bpf_prog *old_prog, *effective = NULL; struct cgroup_subsys_state *pos; + bool overridable = true; - old_prog = xchg(cgrp->bpf.prog + type, prog); + if (parent) { + overridable = !parent->bpf.disallow_override[type]; + effective = rcu_dereference_protected(parent->bpf.effective[type], + lockdep_is_held(&cgroup_mutex)); + } + + if (prog && effective && !overridable) + /* if parent has non-overridable prog attached, disallow + * attaching new programs to descendent cgroup + */ + return -EPERM; + + if (prog && effective && overridable != new_overridable) + /* if parent has overridable prog attached, only + * allow overridable programs in descendent cgroup + */ + return -EPERM; - effective = (!prog && parent) ? - rcu_dereference_protected(parent->bpf.effective[type], - lockdep_is_held(&cgroup_mutex)) : - prog; + old_prog = cgrp->bpf.prog[type]; + + if (prog) { + overridable = new_overridable; + effective = prog; + if (old_prog && + cgrp->bpf.disallow_override[type] == new_overridable) + /* disallow attaching non-overridable on top + * of existing overridable in this cgroup + * and vice versa + */ + return -EPERM; + } + + if (!prog && !old_prog) + /* report error when trying to detach and nothing is attached */ + return -ENOENT; + + cgrp->bpf.prog[type] = prog; css_for_each_descendant_pre(pos, &cgrp->self) { struct cgroup *desc = container_of(pos, struct cgroup, self); /* skip the subtree if the descendant has its own program */ - if (desc->bpf.prog[type] && desc != cgrp) + if (desc->bpf.prog[type] && desc != cgrp) { pos = css_rightmost_descendant(pos); - else + } else { rcu_assign_pointer(desc->bpf.effective[type], effective); + desc->bpf.disallow_override[type] = !overridable; + } } if (prog) @@ -115,6 +149,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp, bpf_prog_put(old_prog); static_branch_dec(&cgroup_bpf_enabled_key); } + return 0; } /** diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 08a4d287226b..f74ca17af64a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -935,13 +935,14 @@ static int bpf_obj_get(const union bpf_attr *attr) #ifdef CONFIG_CGROUP_BPF -#define BPF_PROG_ATTACH_LAST_FIELD attach_type +#define BPF_PROG_ATTACH_LAST_FIELD attach_flags static int bpf_prog_attach(const union bpf_attr *attr) { + enum bpf_prog_type ptype; struct bpf_prog *prog; struct cgroup *cgrp; - enum bpf_prog_type ptype; + int ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -949,6 +950,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_ATTACH)) return -EINVAL; + if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) + return -EINVAL; + switch (attr->attach_type) { case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: @@ -971,10 +975,13 @@ static int bpf_prog_attach(const union bpf_attr *attr) return PTR_ERR(cgrp); } - cgroup_bpf_update(cgrp, prog, attr->attach_type); + ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, + attr->attach_flags & BPF_F_ALLOW_OVERRIDE); + if (ret) + bpf_prog_put(prog); cgroup_put(cgrp); - return 0; + return ret; } #define BPF_PROG_DETACH_LAST_FIELD attach_type @@ -982,6 +989,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) static int bpf_prog_detach(const union bpf_attr *attr) { struct cgroup *cgrp; + int ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -997,7 +1005,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) if (IS_ERR(cgrp)) return PTR_ERR(cgrp); - cgroup_bpf_update(cgrp, NULL, attr->attach_type); + ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); cgroup_put(cgrp); break; @@ -1005,7 +1013,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) return -EINVAL; } - return 0; + return ret; } #endif /* CONFIG_CGROUP_BPF */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1a754e5d2695..d2bded2b250c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -701,6 +701,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, /* dst_input() and dst_output() can't write for now */ if (t == BPF_WRITE) return false; + /* fallthrough */ case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: case BPF_PROG_TYPE_XDP: @@ -2007,6 +2008,7 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, case BPF_JGT: /* Unsigned comparison, the minimum value is 0. */ false_reg->min_value = 0; + /* fallthrough */ case BPF_JSGT: /* If this is false then we know the maximum val is val, * otherwise we know the min val is val+1. @@ -2017,6 +2019,7 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, case BPF_JGE: /* Unsigned comparison, the minimum value is 0. */ false_reg->min_value = 0; + /* fallthrough */ case BPF_JSGE: /* If this is false then we know the maximum value is val - 1, * otherwise we know the mimimum value is val. @@ -2055,6 +2058,7 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, case BPF_JGT: /* Unsigned comparison, the minimum value is 0. */ true_reg->min_value = 0; + /* fallthrough */ case BPF_JSGT: /* * If this is false, then the val is <= the register, if it is @@ -2066,6 +2070,7 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, case BPF_JGE: /* Unsigned comparison, the minimum value is 0. */ true_reg->min_value = 0; + /* fallthrough */ case BPF_JSGE: /* If this is false then constant < register, if it is true then * the register < constant. diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 688dd02af985..53bbca7c4859 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -6498,15 +6498,16 @@ static __init int cgroup_namespaces_init(void) subsys_initcall(cgroup_namespaces_init); #ifdef CONFIG_CGROUP_BPF -void cgroup_bpf_update(struct cgroup *cgrp, - struct bpf_prog *prog, - enum bpf_attach_type type) +int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, bool overridable) { struct cgroup *parent = cgroup_parent(cgrp); + int ret; mutex_lock(&cgroup_mutex); - __cgroup_bpf_update(cgrp, parent, prog, type); + ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable); mutex_unlock(&cgroup_mutex); + return ret; } #endif /* CONFIG_CGROUP_BPF */ diff --git a/kernel/events/core.c b/kernel/events/core.c index e5aaa806702d..e235bb991bdd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3487,14 +3487,15 @@ struct perf_read_data { int ret; }; -static int find_cpu_to_read(struct perf_event *event, int local_cpu) +static int __perf_event_read_cpu(struct perf_event *event, int event_cpu) { - int event_cpu = event->oncpu; u16 local_pkg, event_pkg; if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) { - event_pkg = topology_physical_package_id(event_cpu); - local_pkg = topology_physical_package_id(local_cpu); + int local_cpu = smp_processor_id(); + + event_pkg = topology_physical_package_id(event_cpu); + local_pkg = topology_physical_package_id(local_cpu); if (event_pkg == local_pkg) return local_cpu; @@ -3624,7 +3625,7 @@ u64 perf_event_read_local(struct perf_event *event) static int perf_event_read(struct perf_event *event, bool group) { - int ret = 0, cpu_to_read, local_cpu; + int event_cpu, ret = 0; /* * If event is enabled and currently active on a CPU, update the @@ -3637,21 +3638,25 @@ static int perf_event_read(struct perf_event *event, bool group) .ret = 0, }; - local_cpu = get_cpu(); - cpu_to_read = find_cpu_to_read(event, local_cpu); - put_cpu(); + event_cpu = READ_ONCE(event->oncpu); + if ((unsigned)event_cpu >= nr_cpu_ids) + return 0; + + preempt_disable(); + event_cpu = __perf_event_read_cpu(event, event_cpu); /* * Purposely ignore the smp_call_function_single() return * value. * - * If event->oncpu isn't a valid CPU it means the event got + * If event_cpu isn't a valid CPU it means the event got * scheduled out and that will have updated the event count. * * Therefore, either way, we'll have an up-to-date event count * after this. */ - (void)smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1); + (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1); + preempt_enable(); ret = data.ret; } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index b6e4c16377c7..9c15a9124e83 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -18,10 +18,8 @@ void print_stack_trace(struct stack_trace *trace, int spaces) if (WARN_ON(!trace->entries)) return; - for (i = 0; i < trace->nr_entries; i++) { - printk("%*c", 1 + spaces, ' '); - print_ip_sym(trace->entries[i]); - } + for (i = 0; i < trace->nr_entries; i++) + printk("%*c%pS\n", 1 + spaces, ' ', (void *)trace->entries[i]); } EXPORT_SYMBOL_GPL(print_stack_trace); @@ -29,7 +27,6 @@ int snprint_stack_trace(char *buf, size_t size, struct stack_trace *trace, int spaces) { int i; - unsigned long ip; int generated; int total = 0; @@ -37,9 +34,8 @@ int snprint_stack_trace(char *buf, size_t size, return 0; for (i = 0; i < trace->nr_entries; i++) { - ip = trace->entries[i]; - generated = snprintf(buf, size, "%*c[<%p>] %pS\n", - 1 + spaces, ' ', (void *) ip, (void *) ip); + generated = snprintf(buf, size, "%*c%pS\n", 1 + spaces, ' ', + (void *)trace->entries[i]); total += generated; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 74e0388cc88d..fc6f740d0277 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -725,6 +725,11 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, */ if (delta == 0) { tick_nohz_restart(ts, now); + /* + * Make sure next tick stop doesn't get fooled by past + * clock deadline + */ + ts->next_tick = 0; goto out; } } diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 32d0ad058380..172454e6b979 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -32,6 +32,11 @@ #define HASH_MIN_SIZE 4U #define BUCKET_LOCKS_PER_CPU 32UL +union nested_table { + union nested_table __rcu *table; + struct rhash_head __rcu *bucket; +}; + static u32 head_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he) @@ -76,6 +81,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, /* Never allocate more than 0.5 locks per bucket */ size = min_t(unsigned int, size, tbl->size >> 1); + if (tbl->nest) + size = min(size, 1U << tbl->nest); + if (sizeof(spinlock_t) != 0) { tbl->locks = NULL; #ifdef CONFIG_NUMA @@ -99,8 +107,45 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, return 0; } +static void nested_table_free(union nested_table *ntbl, unsigned int size) +{ + const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); + const unsigned int len = 1 << shift; + unsigned int i; + + ntbl = rcu_dereference_raw(ntbl->table); + if (!ntbl) + return; + + if (size > len) { + size >>= shift; + for (i = 0; i < len; i++) + nested_table_free(ntbl + i, size); + } + + kfree(ntbl); +} + +static void nested_bucket_table_free(const struct bucket_table *tbl) +{ + unsigned int size = tbl->size >> tbl->nest; + unsigned int len = 1 << tbl->nest; + union nested_table *ntbl; + unsigned int i; + + ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); + + for (i = 0; i < len; i++) + nested_table_free(ntbl + i, size); + + kfree(ntbl); +} + static void bucket_table_free(const struct bucket_table *tbl) { + if (tbl->nest) + nested_bucket_table_free(tbl); + if (tbl) kvfree(tbl->locks); @@ -112,6 +157,59 @@ static void bucket_table_free_rcu(struct rcu_head *head) bucket_table_free(container_of(head, struct bucket_table, rcu)); } +static union nested_table *nested_table_alloc(struct rhashtable *ht, + union nested_table __rcu **prev, + unsigned int shifted, + unsigned int nhash) +{ + union nested_table *ntbl; + int i; + + ntbl = rcu_dereference(*prev); + if (ntbl) + return ntbl; + + ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC); + + if (ntbl && shifted) { + for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++) + INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht, + (i << shifted) | nhash); + } + + rcu_assign_pointer(*prev, ntbl); + + return ntbl; +} + +static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, + size_t nbuckets, + gfp_t gfp) +{ + const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); + struct bucket_table *tbl; + size_t size; + + if (nbuckets < (1 << (shift + 1))) + return NULL; + + size = sizeof(*tbl) + sizeof(tbl->buckets[0]); + + tbl = kzalloc(size, gfp); + if (!tbl) + return NULL; + + if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, + 0, 0)) { + kfree(tbl); + return NULL; + } + + tbl->nest = (ilog2(nbuckets) - 1) % shift + 1; + + return tbl; +} + static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, size_t nbuckets, gfp_t gfp) @@ -126,10 +224,17 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY); if (tbl == NULL && gfp == GFP_KERNEL) tbl = vzalloc(size); + + size = nbuckets; + + if (tbl == NULL && gfp != GFP_KERNEL) { + tbl = nested_bucket_table_alloc(ht, nbuckets, gfp); + nbuckets = 0; + } if (tbl == NULL) return NULL; - tbl->size = nbuckets; + tbl->size = size; if (alloc_bucket_locks(ht, tbl, gfp) < 0) { bucket_table_free(tbl); @@ -164,12 +269,17 @@ static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *new_tbl = rhashtable_last_table(ht, rht_dereference_rcu(old_tbl->future_tbl, ht)); - struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; - int err = -ENOENT; + struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash); + int err = -EAGAIN; struct rhash_head *head, *next, *entry; spinlock_t *new_bucket_lock; unsigned int new_hash; + if (new_tbl->nest) + goto out; + + err = -ENOENT; + rht_for_each(entry, old_tbl, old_hash) { err = 0; next = rht_dereference_bucket(entry->next, old_tbl, old_hash); @@ -202,19 +312,26 @@ out: return err; } -static void rhashtable_rehash_chain(struct rhashtable *ht, +static int rhashtable_rehash_chain(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); spinlock_t *old_bucket_lock; + int err; old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); spin_lock_bh(old_bucket_lock); - while (!rhashtable_rehash_one(ht, old_hash)) + while (!(err = rhashtable_rehash_one(ht, old_hash))) ; - old_tbl->rehash++; + + if (err == -ENOENT) { + old_tbl->rehash++; + err = 0; + } spin_unlock_bh(old_bucket_lock); + + return err; } static int rhashtable_rehash_attach(struct rhashtable *ht, @@ -246,13 +363,17 @@ static int rhashtable_rehash_table(struct rhashtable *ht) struct bucket_table *new_tbl; struct rhashtable_walker *walker; unsigned int old_hash; + int err; new_tbl = rht_dereference(old_tbl->future_tbl, ht); if (!new_tbl) return 0; - for (old_hash = 0; old_hash < old_tbl->size; old_hash++) - rhashtable_rehash_chain(ht, old_hash); + for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { + err = rhashtable_rehash_chain(ht, old_hash); + if (err) + return err; + } /* Publish the new table pointer. */ rcu_assign_pointer(ht->tbl, new_tbl); @@ -271,31 +392,16 @@ static int rhashtable_rehash_table(struct rhashtable *ht) return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } -/** - * rhashtable_expand - Expand hash table while allowing concurrent lookups - * @ht: the hash table to expand - * - * A secondary bucket array is allocated and the hash entries are migrated. - * - * This function may only be called in a context where it is safe to call - * synchronize_rcu(), e.g. not within a rcu_read_lock() section. - * - * The caller must ensure that no concurrent resizing occurs by holding - * ht->mutex. - * - * It is valid to have concurrent insertions and deletions protected by per - * bucket locks or concurrent RCU protected lookups and traversals. - */ -static int rhashtable_expand(struct rhashtable *ht) +static int rhashtable_rehash_alloc(struct rhashtable *ht, + struct bucket_table *old_tbl, + unsigned int size) { - struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl; int err; ASSERT_RHT_MUTEX(ht); - old_tbl = rhashtable_last_table(ht, old_tbl); - - new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL); + new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); if (new_tbl == NULL) return -ENOMEM; @@ -324,12 +430,9 @@ static int rhashtable_expand(struct rhashtable *ht) */ static int rhashtable_shrink(struct rhashtable *ht) { - struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); unsigned int nelems = atomic_read(&ht->nelems); unsigned int size = 0; - int err; - - ASSERT_RHT_MUTEX(ht); if (nelems) size = roundup_pow_of_two(nelems * 3 / 2); @@ -342,15 +445,7 @@ static int rhashtable_shrink(struct rhashtable *ht) if (rht_dereference(old_tbl->future_tbl, ht)) return -EEXIST; - new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); - if (new_tbl == NULL) - return -ENOMEM; - - err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); - if (err) - bucket_table_free(new_tbl); - - return err; + return rhashtable_rehash_alloc(ht, old_tbl, size); } static void rht_deferred_worker(struct work_struct *work) @@ -366,11 +461,14 @@ static void rht_deferred_worker(struct work_struct *work) tbl = rhashtable_last_table(ht, tbl); if (rht_grow_above_75(ht, tbl)) - rhashtable_expand(ht); + err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2); else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) - rhashtable_shrink(ht); + err = rhashtable_shrink(ht); + else if (tbl->nest) + err = rhashtable_rehash_alloc(ht, tbl, tbl->size); - err = rhashtable_rehash_table(ht); + if (!err) + err = rhashtable_rehash_table(ht); mutex_unlock(&ht->mutex); @@ -439,8 +537,8 @@ static void *rhashtable_lookup_one(struct rhashtable *ht, int elasticity; elasticity = ht->elasticity; - pprev = &tbl->buckets[hash]; - rht_for_each(head, tbl, hash) { + pprev = rht_bucket_var(tbl, hash); + rht_for_each_continue(head, *pprev, tbl, hash) { struct rhlist_head *list; struct rhlist_head *plist; @@ -477,6 +575,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, struct rhash_head *obj, void *data) { + struct rhash_head __rcu **pprev; struct bucket_table *new_tbl; struct rhash_head *head; @@ -499,7 +598,11 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, if (unlikely(rht_grow_above_100(ht, tbl))) return ERR_PTR(-EAGAIN); - head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + pprev = rht_bucket_insert(ht, tbl, hash); + if (!pprev) + return ERR_PTR(-ENOMEM); + + head = rht_dereference_bucket(*pprev, tbl, hash); RCU_INIT_POINTER(obj->next, head); if (ht->rhlist) { @@ -509,7 +612,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, RCU_INIT_POINTER(list->next, NULL); } - rcu_assign_pointer(tbl->buckets[hash], obj); + rcu_assign_pointer(*pprev, obj); atomic_inc(&ht->nelems); if (rht_grow_above_75(ht, tbl)) @@ -975,7 +1078,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), void *arg) { - const struct bucket_table *tbl; + struct bucket_table *tbl; unsigned int i; cancel_work_sync(&ht->run_work); @@ -986,7 +1089,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; - for (pos = rht_dereference(tbl->buckets[i], ht), + for (pos = rht_dereference(*rht_bucket(tbl, i), ht), next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL; !rht_is_a_nulls(pos); @@ -1007,3 +1110,70 @@ void rhashtable_destroy(struct rhashtable *ht) return rhashtable_free_and_destroy(ht, NULL, NULL); } EXPORT_SYMBOL_GPL(rhashtable_destroy); + +struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl, + unsigned int hash) +{ + const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); + static struct rhash_head __rcu *rhnull = + (struct rhash_head __rcu *)NULLS_MARKER(0); + unsigned int index = hash & ((1 << tbl->nest) - 1); + unsigned int size = tbl->size >> tbl->nest; + unsigned int subhash = hash; + union nested_table *ntbl; + + ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); + ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash); + subhash >>= tbl->nest; + + while (ntbl && size > (1 << shift)) { + index = subhash & ((1 << shift) - 1); + ntbl = rht_dereference_bucket(ntbl[index].table, tbl, hash); + size >>= shift; + subhash >>= shift; + } + + if (!ntbl) + return &rhnull; + + return &ntbl[subhash].bucket; + +} +EXPORT_SYMBOL_GPL(rht_bucket_nested); + +struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash) +{ + const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); + unsigned int index = hash & ((1 << tbl->nest) - 1); + unsigned int size = tbl->size >> tbl->nest; + union nested_table *ntbl; + unsigned int shifted; + unsigned int nhash; + + ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); + hash >>= tbl->nest; + nhash = index; + shifted = tbl->nest; + ntbl = nested_table_alloc(ht, &ntbl[index].table, + size <= (1 << shift) ? shifted : 0, nhash); + + while (ntbl && size > (1 << shift)) { + index = hash & ((1 << shift) - 1); + size >>= shift; + hash >>= shift; + nhash |= index << shifted; + shifted += shift; + ntbl = nested_table_alloc(ht, &ntbl[index].table, + size <= (1 << shift) ? shifted : 0, + nhash); + } + + if (!ntbl) + return NULL; + + return &ntbl[hash].bucket; + +} +EXPORT_SYMBOL_GPL(rht_bucket_nested_insert); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index d208ee9ab60a..ea71513fca21 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -79,7 +79,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_multicast_flood(mdst, skb, false, true); else br_flood(br, skb, BR_PKT_MULTICAST, false, true); - } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) { + } else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) { br_forward(dst->dst, skb, false, true); } else { br_flood(br, skb, BR_PKT_UNICAST, false, true); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 5693168e88b6..4ac11574117c 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -28,9 +28,6 @@ #include "br_private.h" static struct kmem_cache *br_fdb_cache __read_mostly; -static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, - const unsigned char *addr, - __u16 vid); static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid); static void fdb_notify(struct net_bridge *br, @@ -86,6 +83,47 @@ static void fdb_rcu_free(struct rcu_head *head) kmem_cache_free(br_fdb_cache, ent); } +static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, + const unsigned char *addr, + __u16 vid) +{ + struct net_bridge_fdb_entry *f; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + hlist_for_each_entry_rcu(f, head, hlist) + if (ether_addr_equal(f->addr.addr, addr) && f->vlan_id == vid) + break; + + return f; +} + +/* requires bridge hash_lock */ +static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br, + const unsigned char *addr, + __u16 vid) +{ + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; + struct net_bridge_fdb_entry *fdb; + + WARN_ON_ONCE(!br_hash_lock_held(br)); + + rcu_read_lock(); + fdb = fdb_find_rcu(head, addr, vid); + rcu_read_unlock(); + + return fdb; +} + +struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, + const unsigned char *addr, + __u16 vid) +{ + struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; + + return fdb_find_rcu(head, addr, vid); +} + /* When a static FDB entry is added, the mac address from the entry is * added to the bridge private HW address list and all required ports * are then updated with the new information. @@ -198,11 +236,10 @@ void br_fdb_find_delete_local(struct net_bridge *br, const struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *f; spin_lock_bh(&br->hash_lock); - f = fdb_find(head, addr, vid); + f = br_fdb_find(br, addr, vid); if (f && f->is_local && !f->added_by_user && f->dst == p) fdb_delete_local(br, p, f); spin_unlock_bh(&br->hash_lock); @@ -266,7 +303,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) spin_lock_bh(&br->hash_lock); /* If old entry was unassociated with any port, then delete it. */ - f = __br_fdb_get(br, br->dev->dev_addr, 0); + f = br_fdb_find(br, br->dev->dev_addr, 0); if (f && f->is_local && !f->dst && !f->added_by_user) fdb_delete_local(br, NULL, f); @@ -281,7 +318,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; - f = __br_fdb_get(br, br->dev->dev_addr, v->vid); + f = br_fdb_find(br, br->dev->dev_addr, v->vid); if (f && f->is_local && !f->dst && !f->added_by_user) fdb_delete_local(br, NULL, f); fdb_insert(br, NULL, newaddr, v->vid); @@ -380,24 +417,6 @@ void br_fdb_delete_by_port(struct net_bridge *br, spin_unlock_bh(&br->hash_lock); } -/* No locking or refcounting, assumes caller has rcu_read_lock */ -struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr, - __u16 vid) -{ - struct net_bridge_fdb_entry *fdb; - - hlist_for_each_entry_rcu(fdb, - &br->hash[br_mac_hash(addr, vid)], hlist) { - if (ether_addr_equal(fdb->addr.addr, addr) && - fdb->vlan_id == vid) { - return fdb; - } - } - - return NULL; -} - #if IS_ENABLED(CONFIG_ATM_LANE) /* Interface used by ATM LANE hook to test * if an addr is on some other bridge port */ @@ -412,7 +431,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) if (!port) ret = 0; else { - fdb = __br_fdb_get(port->br, addr, 0); + fdb = br_fdb_find_rcu(port->br, addr, 0); ret = fdb && fdb->dst && fdb->dst->dev != dev && fdb->dst->state == BR_STATE_FORWARDING; } @@ -474,34 +493,6 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, return num; } -static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head, - const unsigned char *addr, - __u16 vid) -{ - struct net_bridge_fdb_entry *fdb; - - hlist_for_each_entry(fdb, head, hlist) { - if (ether_addr_equal(fdb->addr.addr, addr) && - fdb->vlan_id == vid) - return fdb; - } - return NULL; -} - -static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head, - const unsigned char *addr, - __u16 vid) -{ - struct net_bridge_fdb_entry *fdb; - - hlist_for_each_entry_rcu(fdb, head, hlist) { - if (ether_addr_equal(fdb->addr.addr, addr) && - fdb->vlan_id == vid) - return fdb; - } - return NULL; -} - static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head, struct net_bridge_port *source, const unsigned char *addr, @@ -535,7 +526,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, if (!is_valid_ether_addr(addr)) return -EINVAL; - fdb = fdb_find(head, addr, vid); + fdb = br_fdb_find(br, addr, vid); if (fdb) { /* it is okay to have multiple ports with same * address, just use the first one. @@ -608,7 +599,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, } } else { spin_lock(&br->hash_lock); - if (likely(!fdb_find(head, addr, vid))) { + if (likely(!fdb_find_rcu(head, addr, vid))) { fdb = fdb_create(head, source, addr, vid, 0, 0); if (fdb) { if (unlikely(added_by_user)) @@ -792,7 +783,7 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, return -EINVAL; } - fdb = fdb_find(head, addr, vid); + fdb = br_fdb_find(br, addr, vid); if (fdb == NULL) { if (!(flags & NLM_F_CREATE)) return -ENOENT; @@ -939,55 +930,30 @@ out: return err; } -static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, - u16 vid) -{ - struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; - struct net_bridge_fdb_entry *fdb; - - fdb = fdb_find(head, addr, vid); - if (!fdb) - return -ENOENT; - - fdb_delete(br, fdb); - return 0; -} - -static int __br_fdb_delete_by_addr(struct net_bridge *br, - const unsigned char *addr, u16 vid) -{ - int err; - - spin_lock_bh(&br->hash_lock); - err = fdb_delete_by_addr(br, addr, vid); - spin_unlock_bh(&br->hash_lock); - - return err; -} - -static int fdb_delete_by_addr_and_port(struct net_bridge_port *p, +static int fdb_delete_by_addr_and_port(struct net_bridge *br, + const struct net_bridge_port *p, const u8 *addr, u16 vlan) { - struct net_bridge *br = p->br; - struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)]; struct net_bridge_fdb_entry *fdb; - fdb = fdb_find(head, addr, vlan); + fdb = br_fdb_find(br, addr, vlan); if (!fdb || fdb->dst != p) return -ENOENT; fdb_delete(br, fdb); + return 0; } -static int __br_fdb_delete(struct net_bridge_port *p, +static int __br_fdb_delete(struct net_bridge *br, + const struct net_bridge_port *p, const unsigned char *addr, u16 vid) { int err; - spin_lock_bh(&p->br->hash_lock); - err = fdb_delete_by_addr_and_port(p, addr, vid); - spin_unlock_bh(&p->br->hash_lock); + spin_lock_bh(&br->hash_lock); + err = fdb_delete_by_addr_and_port(br, p, addr, vid); + spin_unlock_bh(&br->hash_lock); return err; } @@ -1000,7 +966,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; struct net_bridge_vlan *v; - struct net_bridge *br = NULL; + struct net_bridge *br; int err; if (dev->priv_flags & IFF_EBRIDGE) { @@ -1014,6 +980,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } vg = nbp_vlan_group(p); + br = p->br; } if (vid) { @@ -1023,30 +990,20 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (dev->priv_flags & IFF_EBRIDGE) - err = __br_fdb_delete_by_addr(br, addr, vid); - else - err = __br_fdb_delete(p, addr, vid); + err = __br_fdb_delete(br, p, addr, vid); } else { err = -ENOENT; - if (dev->priv_flags & IFF_EBRIDGE) - err = __br_fdb_delete_by_addr(br, addr, 0); - else - err &= __br_fdb_delete(p, addr, 0); - + err &= __br_fdb_delete(br, p, addr, 0); if (!vg || !vg->num_vlans) - goto out; + return err; list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; - if (dev->priv_flags & IFF_EBRIDGE) - err = __br_fdb_delete_by_addr(br, addr, v->vid); - else - err &= __br_fdb_delete(p, addr, v->vid); + err &= __br_fdb_delete(br, p, addr, v->vid); } } -out: + return err; } @@ -1117,7 +1074,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, spin_lock_bh(&br->hash_lock); head = &br->hash[br_mac_hash(addr, vid)]; - fdb = fdb_find(head, addr, vid); + fdb = br_fdb_find(br, addr, vid); if (!fdb) { fdb = fdb_create(head, p, addr, vid, 0, 0); if (!fdb) { @@ -1145,15 +1102,13 @@ err_unlock: int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid) { - struct hlist_head *head; struct net_bridge_fdb_entry *fdb; int err = 0; ASSERT_RTNL(); spin_lock_bh(&br->hash_lock); - head = &br->hash[br_mac_hash(addr, vid)]; - fdb = fdb_find(head, addr, vid); + fdb = br_fdb_find(br, addr, vid); if (fdb && fdb->added_by_external_learn) fdb_delete(br, fdb); else diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 4615a9b3e26c..236f34244dbe 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -114,7 +114,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, return; } - f = __br_fdb_get(br, n->ha, vid); + f = br_fdb_find_rcu(br, n->ha, vid); if (f && ((p->flags & BR_PROXYARP) || (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) { arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip, @@ -189,7 +189,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb } break; case BR_PKT_UNICAST: - dst = __br_fdb_get(br, dest, vid); + dst = br_fdb_find_rcu(br, dest, vid); default: break; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 1cbbf63a5ef7..2288fca7756c 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -507,8 +507,9 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr); void br_fdb_cleanup(struct work_struct *work); void br_fdb_delete_by_port(struct net_bridge *br, const struct net_bridge_port *p, u16 vid, int do_all); -struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, - const unsigned char *addr, __u16 vid); +struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, + const unsigned char *addr, + __u16 vid); int br_fdb_test_addr(struct net_device *dev, unsigned char *addr); int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count, unsigned long off); @@ -530,6 +531,15 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); +static inline bool br_hash_lock_held(struct net_bridge *br) +{ +#ifdef CONFIG_LOCKDEP + return lockdep_is_held(&br->hash_lock); +#else + return true; +#endif +} + /* br_forward.c */ enum br_pkt_type { BR_PKT_UNICAST, diff --git a/net/core/dev.c b/net/core/dev.c index 363c44b9be63..05d19c6acf94 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4510,6 +4510,11 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (&ptype->list == head) goto normal; + if (IS_ERR(pp) && PTR_ERR(pp) == -EINPROGRESS) { + ret = GRO_CONSUMED; + goto ok; + } + same_flow = NAPI_GRO_CB(skb)->same_flow; ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; @@ -4614,6 +4619,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) case GRO_HELD: case GRO_MERGED: + case GRO_CONSUMED: break; } @@ -4685,6 +4691,7 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, break; case GRO_MERGED: + case GRO_CONSUMED: break; } @@ -5008,9 +5015,6 @@ count: LINUX_MIB_BUSYPOLLRXPACKETS, rc); local_bh_enable(); - if (rc == LL_FLUSH_FAILED) - break; /* permanent failure */ - if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) || busy_loop_timeout(end_time)) break; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7bb12e07ffef..e7c12caa20c8 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2923,7 +2923,8 @@ static void neigh_proc_update(struct ctl_table *ctl, int write) return; set_bit(index, p->data_state); - call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); + if (index == NEIGH_VAR_DELAY_PROBE_TIME) + call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); if (!dev) /* NULL dev means this is default value */ neigh_copy_dflt_parms(net, p, index); } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index d21be3b3d9cc..1446810047f5 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -475,7 +475,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e30f9caddae8..91a2557942fa 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -361,6 +361,19 @@ config INET_ESP If unsure, say Y. +config INET_ESP_OFFLOAD + tristate "IP: ESP transformation offload" + depends on INET_ESP + select XFRM_OFFLOAD + default n + ---help--- + Support for ESP transformation offload. This makes sense + only if this system really does IPsec and want to do it + with high throughput. A typical desktop system does not + need it, even if it does IPsec. + + If unsure, say N. + config INET_IPCOMP tristate "IP: IPComp transformation" select INET_XFRM_TUNNEL diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 48af58a5686e..c6d4238ff94a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_NET_IPVTI) += ip_vti.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_AH) += ah4.o obj-$(CONFIG_INET_ESP) += esp4.o +obj-$(CONFIG_INET_ESP_OFFLOAD) += esp4_offload.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 685ba53df2d1..602d40f43687 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1423,7 +1423,7 @@ out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 89a8cac4726a..51b27ae09fbd 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1263,7 +1263,7 @@ void __init arp_init(void) /* * ax25 -> ASCII conversion */ -static char *ax2asc2(ax25_address *a, char *buf) +static void ax2asc2(ax25_address *a, char *buf) { char c, *s; int n; @@ -1285,10 +1285,10 @@ static char *ax2asc2(ax25_address *a, char *buf) *s++ = n + '0'; *s++ = '\0'; - if (*buf == '\0' || *buf == '-') - return "*"; - - return buf; + if (*buf == '\0' || *buf == '-') { + buf[0] = '*'; + buf[1] = '\0'; + } } #endif /* CONFIG_AX25 */ @@ -1322,7 +1322,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, } #endif sprintf(tbuf, "%pI4", n->primary_key); - seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n", + seq_printf(seq, "%-16s 0x%-10x0x%-10x%-17s * %s\n", tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name); read_unlock(&n->lock); } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c new file mode 100644 index 000000000000..1de442632406 --- /dev/null +++ b/net/ipv4/esp4_offload.c @@ -0,0 +1,106 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * Copyright (C) 2016 secunet Security Networks AG + * Author: Steffen Klassert <steffen.klassert@secunet.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * ESP GRO support + */ + +#include <linux/skbuff.h> +#include <linux/init.h> +#include <net/protocol.h> +#include <crypto/aead.h> +#include <crypto/authenc.h> +#include <linux/err.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/xfrm.h> +#include <net/esp.h> +#include <linux/scatterlist.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <net/udp.h> + +static struct sk_buff **esp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + struct xfrm_offload *xo; + struct xfrm_state *x; + __be32 seq; + __be32 spi; + int err; + + skb_pull(skb, offset); + + if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) + goto out; + + err = secpath_set(skb); + if (err) + goto out; + + if (skb->sp->len == XFRM_MAX_DEPTH) + goto out; + + x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ip_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET); + if (!x) + goto out; + + skb->sp->xvec[skb->sp->len++] = x; + skb->sp->olen++; + + xo = xfrm_offload(skb); + if (!xo) { + xfrm_state_put(x); + goto out; + } + xo->flags |= XFRM_GRO; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + XFRM_SPI_SKB_CB(skb)->seq = seq; + + /* We don't need to handle errors from xfrm_input, it does all + * the error handling and frees the resources on error. */ + xfrm_input(skb, IPPROTO_ESP, spi, -2); + + return ERR_PTR(-EINPROGRESS); +out: + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} + +static const struct net_offload esp4_offload = { + .callbacks = { + .gro_receive = esp4_gro_receive, + }, +}; + +static int __init esp4_offload_init(void) +{ + return inet_add_offload(&esp4_offload, IPPROTO_ESP); +} + +static void __exit esp4_offload_exit(void) +{ + inet_del_offload(&esp4_offload, IPPROTO_ESP); +} + +module_init(esp4_offload_init); +module_exit(esp4_offload_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index f6c50af24a64..3d063eb37848 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -117,7 +117,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, (fwmark > 0 && skb->mark == fwmark)) && (full || tp->snd_cwnd != tcp_probe.lastcwnd)) { - spin_lock(&tcp_probe.lock); + spin_lock_bh(&tcp_probe.lock); /* If log fills, just silently drop */ if (tcp_probe_avail() > 1) { struct tcp_log *p = tcp_probe.log + tcp_probe.head; @@ -157,7 +157,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1); } tcp_probe.lastcwnd = tp->snd_cwnd; - spin_unlock(&tcp_probe.lock); + spin_unlock_bh(&tcp_probe.lock); wake_up(&tcp_probe.wait); } diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 62e1e72db461..1fc684111ce6 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -40,6 +40,7 @@ drop: int xfrm4_transport_finish(struct sk_buff *skb, int async) { + struct xfrm_offload *xo = xfrm_offload(skb); struct iphdr *iph = ip_hdr(skb); iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol; @@ -53,6 +54,11 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) iph->tot_len = htons(skb->len); ip_send_check(iph); + if (xo && (xo->flags & XFRM_GRO)) { + skb_mac_header_rebuild(skb); + return 0; + } + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, xfrm4_rcv_encap_finish); diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index fd840c7d75ea..4acc0508c5eb 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -43,6 +43,7 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); + struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -50,7 +51,8 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); - skb_reset_transport_header(skb); + if (!xo || !(xo->flags & XFRM_GRO)) + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 6a7ff6957535..71b4ecc195c7 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -17,8 +17,6 @@ #include <net/ip.h> #include <net/l3mdev.h> -static struct xfrm_policy_afinfo xfrm4_policy_afinfo; - static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, int tos, int oif, const xfrm_address_t *saddr, @@ -219,7 +217,7 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops) { struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); - xfrm4_policy_afinfo.garbage_collect(net); + xfrm_garbage_collect_deferred(net); return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); } @@ -271,8 +269,7 @@ static struct dst_ops xfrm4_dst_ops_template = { .gc_thresh = INT_MAX, }; -static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { - .family = AF_INET, +static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .dst_ops = &xfrm4_dst_ops_template, .dst_lookup = xfrm4_dst_lookup, .get_saddr = xfrm4_get_saddr, @@ -376,7 +373,7 @@ static struct pernet_operations __net_initdata xfrm4_net_ops = { static void __init xfrm4_policy_init(void) { - xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); + xfrm_policy_register_afinfo(&xfrm4_policy_afinfo, AF_INET); } void __init xfrm4_init(void) diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index dccefa9d84cf..8dd0e6ab8606 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -188,9 +188,8 @@ static const struct net_protocol ipcomp4_protocol = { .netns_ok = 1, }; -static struct xfrm_input_afinfo xfrm4_input_afinfo = { +static const struct xfrm_input_afinfo xfrm4_input_afinfo = { .family = AF_INET, - .owner = THIS_MODULE, .callback = xfrm4_rcv_cb, }; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 3c7c76b2a7ba..e2afe677a9d9 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -75,6 +75,19 @@ config INET6_ESP If unsure, say Y. +config INET6_ESP_OFFLOAD + tristate "IPv6: ESP transformation offload" + depends on INET6_ESP + select XFRM_OFFLOAD + default n + ---help--- + Support for ESP transformation offload. This makes sense + only if this system really does IPsec and want to do it + with high throughput. A typical desktop system does not + need it, even if it does IPsec. + + If unsure, say N. + config INET6_IPCOMP tristate "IPv6: IPComp transformation" select INET6_XFRM_TUNNEL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index a9e9fec387ce..217e9ff0e24b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -30,6 +30,7 @@ ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o obj-$(CONFIG_INET6_ESP) += esp6.o +obj-$(CONFIG_INET6_ESP_OFFLOAD) += esp6_offload.o obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a3eaafd87100..eec27f87efac 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -167,18 +167,22 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, if (np->sndflow) fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; - addr_type = ipv6_addr_type(&usin->sin6_addr); - - if (addr_type == IPV6_ADDR_ANY) { + if (ipv6_addr_any(&usin->sin6_addr)) { /* * connect to self */ - usin->sin6_addr.s6_addr[15] = 0x01; + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + &usin->sin6_addr); + else + usin->sin6_addr = in6addr_loopback; } + addr_type = ipv6_addr_type(&usin->sin6_addr); + daddr = &usin->sin6_addr; - if (addr_type == IPV6_ADDR_MAPPED) { + if (addr_type & IPV6_ADDR_MAPPED) { struct sockaddr_in sin; if (__ipv6_only_sock(sk)) { diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c new file mode 100644 index 000000000000..d914eb93204a --- /dev/null +++ b/net/ipv6/esp6_offload.c @@ -0,0 +1,108 @@ +/* + * IPV6 GSO/GRO offload support + * Linux INET implementation + * + * Copyright (C) 2016 secunet Security Networks AG + * Author: Steffen Klassert <steffen.klassert@secunet.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * ESP GRO support + */ + +#include <linux/skbuff.h> +#include <linux/init.h> +#include <net/protocol.h> +#include <crypto/aead.h> +#include <crypto/authenc.h> +#include <linux/err.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/xfrm.h> +#include <net/esp.h> +#include <linux/scatterlist.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <net/ip6_route.h> +#include <net/ipv6.h> +#include <linux/icmpv6.h> + +static struct sk_buff **esp6_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + struct xfrm_offload *xo; + struct xfrm_state *x; + __be32 seq; + __be32 spi; + int err; + + skb_pull(skb, offset); + + if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) + goto out; + + err = secpath_set(skb); + if (err) + goto out; + + if (skb->sp->len == XFRM_MAX_DEPTH) + goto out; + + x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ipv6_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET6); + if (!x) + goto out; + + skb->sp->xvec[skb->sp->len++] = x; + skb->sp->olen++; + + xo = xfrm_offload(skb); + if (!xo) { + xfrm_state_put(x); + goto out; + } + xo->flags |= XFRM_GRO; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + XFRM_SPI_SKB_CB(skb)->family = AF_INET6; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); + XFRM_SPI_SKB_CB(skb)->seq = seq; + + /* We don't need to handle errors from xfrm_input, it does all + * the error handling and frees the resources on error. */ + xfrm_input(skb, IPPROTO_ESP, spi, -2); + + return ERR_PTR(-EINPROGRESS); +out: + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} + +static const struct net_offload esp6_offload = { + .callbacks = { + .gro_receive = esp6_gro_receive, + }, +}; + +static int __init esp6_offload_init(void) +{ + return inet6_add_offload(&esp6_offload, IPPROTO_ESP); +} + +static void __exit esp6_offload_exit(void) +{ + inet6_del_offload(&esp6_offload, IPPROTO_ESP); +} + +module_init(esp6_offload_init); +module_exit(esp6_offload_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index fc7b4017ba24..0838e6d01d2e 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -253,7 +253,7 @@ out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a75871c62328..d0f51b420447 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1022,6 +1022,9 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, } } #endif + if (ipv6_addr_v4mapped(&fl6->saddr) && + !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) + return -EAFNOSUPPORT; return 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b5d27212db2f..21c719965b6b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -149,8 +149,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * connect() to INADDR_ANY means loopback (BSD'ism). */ - if (ipv6_addr_any(&usin->sin6_addr)) - usin->sin6_addr.s6_addr[15] = 0x1; + if (ipv6_addr_any(&usin->sin6_addr)) { + if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + &usin->sin6_addr); + else + usin->sin6_addr = in6addr_loopback; + } addr_type = ipv6_addr_type(&usin->sin6_addr); @@ -189,7 +194,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * TCP over IPv4 */ - if (addr_type == IPV6_ADDR_MAPPED) { + if (addr_type & IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index df71ba05f41d..4e4c401e3bc6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1046,6 +1046,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; daddr = &sin6->sin6_addr; + if (ipv6_addr_any(daddr) && + ipv6_addr_v4mapped(&np->saddr)) + ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), + daddr); break; case AF_INET: goto do_udp_sendmsg; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index b5789562aded..08a807b29298 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -33,6 +33,8 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); int xfrm6_transport_finish(struct sk_buff *skb, int async) { + struct xfrm_offload *xo = xfrm_offload(skb); + skb_network_header(skb)[IP6CB(skb)->nhoff] = XFRM_MODE_SKB_CB(skb)->protocol; @@ -44,6 +46,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); + if (xo && (xo->flags & XFRM_GRO)) { + skb_mac_header_rebuild(skb); + return -1; + } + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, ip6_rcv_finish); @@ -69,18 +76,9 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, struct xfrm_state *x = NULL; int i = 0; - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { - struct sec_path *sp; - - sp = secpath_dup(skb->sp); - if (!sp) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); - goto drop; - } - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; + if (secpath_set(skb)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); + goto drop; } if (1 + skb->sp->len == XFRM_MAX_DEPTH) { diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 4e344105b3fd..4439ee44c8b0 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -47,6 +47,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { int ihl = skb->data - skb_transport_header(skb); + struct xfrm_offload *xo = xfrm_offload(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), @@ -55,7 +56,8 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - sizeof(struct ipv6hdr)); - skb_reset_transport_header(skb); + if (!xo || !(xo->flags & XFRM_GRO)) + skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index e0f71c01d728..79651bc71bf0 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -25,8 +25,6 @@ #include <net/mip6.h> #endif -static struct xfrm_policy_afinfo xfrm6_policy_afinfo; - static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr) @@ -220,7 +218,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) { struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); - xfrm6_policy_afinfo.garbage_collect(net); + xfrm_garbage_collect_deferred(net); return dst_entries_get_fast(ops) > ops->gc_thresh * 2; } @@ -291,8 +289,7 @@ static struct dst_ops xfrm6_dst_ops_template = { .gc_thresh = INT_MAX, }; -static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { - .family = AF_INET6, +static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .dst_ops = &xfrm6_dst_ops_template, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, @@ -305,7 +302,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { static int __init xfrm6_policy_init(void) { - return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo); + return xfrm_policy_register_afinfo(&xfrm6_policy_afinfo, AF_INET6); } static void xfrm6_policy_fini(void) diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index 54d13f8dbbae..b2dc8ce49378 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -162,9 +162,8 @@ static const struct inet6_protocol ipcomp6_protocol = { .flags = INET6_PROTO_NOPOLICY, }; -static struct xfrm_input_afinfo xfrm6_input_afinfo = { +static const struct xfrm_input_afinfo xfrm6_input_afinfo = { .family = AF_INET6, - .owner = THIS_MODULE, .callback = xfrm6_rcv_cb, }; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 64f0e8531af0..a646f3481240 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1044,8 +1044,10 @@ wait_for_memory: } else { /* Message not complete, save state */ partial_message: - kcm->seq_skb = head; - kcm_tx_msg(head)->last_skb = skb; + if (head) { + kcm->seq_skb = head; + kcm_tx_msg(head)->last_skb = skb; + } } KCM_STATS_ADD(kcm->stats.tx_bytes, copied); diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 3e821daf9dd4..8bc5a1bd2d45 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -821,7 +821,10 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) * another trick required to cope with how the PROCOM state * machine works. -acme */ + skb_orphan(skb); + sock_hold(sk); skb->sk = sk; + skb->destructor = sock_efree; } if (!sock_owned_by_user(sk)) llc_conn_rcv(sk, skb); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index d0e1e804ebd7..5404d0d195cc 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -290,7 +290,10 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; + skb_orphan(skb); + sock_hold(sk); skb->sk = sk; + skb->destructor = sock_efree; llc_sap_state_process(sap, skb); } diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 09141a18ee2d..89193a634da4 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -149,6 +149,8 @@ static void do_setup(struct net_device *netdev) { ether_setup(netdev); + netdev->max_mtu = ETH_MAX_MTU; + netdev->netdev_ops = &internal_dev_netdev_ops; netdev->priv_flags &= ~IFF_TX_SKB_SHARING; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a984f6f9ab4e..7098e2a457e4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1627,6 +1627,7 @@ static void fanout_release_data(struct packet_fanout *f) static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { + struct packet_rollover *rollover = NULL; struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f, *match; u8 type = type_flags & 0xff; @@ -1649,23 +1650,28 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) return -EINVAL; } + mutex_lock(&fanout_mutex); + + err = -EINVAL; if (!po->running) - return -EINVAL; + goto out; + err = -EALREADY; if (po->fanout) - return -EALREADY; + goto out; if (type == PACKET_FANOUT_ROLLOVER || (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) { - po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL); - if (!po->rollover) - return -ENOMEM; - atomic_long_set(&po->rollover->num, 0); - atomic_long_set(&po->rollover->num_huge, 0); - atomic_long_set(&po->rollover->num_failed, 0); + err = -ENOMEM; + rollover = kzalloc(sizeof(*rollover), GFP_KERNEL); + if (!rollover) + goto out; + atomic_long_set(&rollover->num, 0); + atomic_long_set(&rollover->num_huge, 0); + atomic_long_set(&rollover->num_failed, 0); + po->rollover = rollover; } - mutex_lock(&fanout_mutex); match = NULL; list_for_each_entry(f, &fanout_list, list) { if (f->id == id && @@ -1712,11 +1718,11 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } } out: - mutex_unlock(&fanout_mutex); - if (err) { - kfree(po->rollover); + if (err && rollover) { + kfree(rollover); po->rollover = NULL; } + mutex_unlock(&fanout_mutex); return err; } @@ -1725,23 +1731,22 @@ static void fanout_release(struct sock *sk) struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; - f = po->fanout; - if (!f) - return; - mutex_lock(&fanout_mutex); - po->fanout = NULL; + f = po->fanout; + if (f) { + po->fanout = NULL; + + if (atomic_dec_and_test(&f->sk_ref)) { + list_del(&f->list); + dev_remove_pack(&f->prot_hook); + fanout_release_data(f); + kfree(f); + } - if (atomic_dec_and_test(&f->sk_ref)) { - list_del(&f->list); - dev_remove_pack(&f->prot_hook); - fanout_release_data(f); - kfree(f); + if (po->rollover) + kfree_rcu(po->rollover, rcu); } mutex_unlock(&fanout_mutex); - - if (po->rollover) - kfree_rcu(po->rollover, rcu); } static bool packet_extra_vlan_len_allowed(const struct net_device *dev, diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 3c5e29ba6594..f219ff325ed4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -529,7 +529,7 @@ errout: return err; } -int nla_memdup_cookie(struct tc_action *a, struct nlattr **tb) +static int nla_memdup_cookie(struct tc_action *a, struct nlattr **tb) { a->act_cookie = kzalloc(sizeof(*a->act_cookie), GFP_KERNEL); if (!a->act_cookie) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index abe1fe13ae54..732f7cae459d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -389,6 +389,7 @@ replay: RTM_DELTFILTER, false); if (tcf_proto_destroy(tp, false)) RCU_INIT_POINTER(*back, next); + goto errout; case RTM_GETTFILTER: err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, true); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index d9c97018317d..80f688436dd7 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -148,6 +148,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_bpf_offload bpf_offload = {}; struct tc_to_netdev offload; + int err; offload.type = TC_SETUP_CLSBPF; offload.cls_bpf = &bpf_offload; @@ -159,8 +160,13 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, bpf_offload.exts_integrated = prog->exts_integrated; bpf_offload.gen_flags = prog->gen_flags; - return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + + if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE)) + prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; + + return err; } static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, @@ -511,6 +517,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, return ret; } + if (!tc_in_hw(prog->gen_flags)) + prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; + if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 0826c8ec3a76..9d0c99d2e9fb 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -273,6 +273,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc); + if (!err) + f->flags |= TCA_CLS_FLAGS_IN_HW; if (tc_skip_sw(f->flags)) return err; @@ -912,6 +914,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, goto errout; } + if (!tc_in_hw(fnew->flags)) + fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + if (fold) { if (!tc_skip_sw(fold->flags)) rhashtable_remove_fast(&head->ht, &fold->ht_node, @@ -1229,7 +1234,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags)) goto nla_put_failure; - nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); + if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags)) + goto nla_put_failure; if (tcf_exts_dump(skb, &f->exts)) goto nla_put_failure; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index f2141cb55562..224eb2c14346 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -56,6 +56,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, struct net_device *dev = tp->q->dev_queue->dev; struct tc_to_netdev offload; struct tc_cls_matchall_offload mall_offload = {0}; + int err; offload.type = TC_SETUP_MATCHALL; offload.cls_mall = &mall_offload; @@ -63,8 +64,12 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, offload.cls_mall->exts = &head->exts; offload.cls_mall->cookie = cookie; - return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, - &offload); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, + &offload); + if (!err) + head->flags |= TCA_CLS_FLAGS_IN_HW; + + return err; } static void mall_destroy_hw_filter(struct tcf_proto *tp, @@ -194,6 +199,9 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, } } + if (!tc_in_hw(new->flags)) + new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + *arg = (unsigned long) head; rcu_assign_pointer(tp->root, new); if (head) @@ -244,6 +252,9 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, nla_put_u32(skb, TCA_MATCHALL_CLASSID, head->res.classid)) goto nla_put_failure; + if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags)) + goto nla_put_failure; + if (tcf_exts_dump(skb, &head->exts)) goto nla_put_failure; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index a6ec3e4b57ab..4dbe0c680fe6 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -523,6 +523,10 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &offload); + + if (!err) + n->flags |= TCA_CLS_FLAGS_IN_HW; + if (tc_skip_sw(flags)) return err; @@ -895,6 +899,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return err; } + if (!tc_in_hw(new->flags)) + new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); call_rcu(&n->rcu, u32_delete_key_rcu); @@ -1014,6 +1021,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (err) goto errhw; + if (!tc_in_hw(n->flags)) + n->flags |= TCA_CLS_FLAGS_NOT_IN_HW; + ins = &ht->ht[TC_U32_HASH(handle)]; for (pins = rtnl_dereference(*ins); pins; ins = &pins->next, pins = rtnl_dereference(*ins)) diff --git a/net/tipc/net.c b/net/tipc/net.c index 28bf4feeb81c..ab8a2d5d1e32 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -110,6 +110,10 @@ int tipc_net_start(struct net *net, u32 addr) char addr_string[16]; tn->own_addr = addr; + + /* Ensure that the new address is visible before we reinit. */ + smp_mb(); + tipc_named_reinit(net); tipc_sk_reinit(net); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 103d1fd058c0..6b09a778cc71 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -430,8 +430,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, INIT_LIST_HEAD(&tsk->cong_links); msg = &tsk->phdr; tn = net_generic(sock_net(sk), tipc_net_id); - tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, - NAMED_H_SIZE, 0); /* Finish initializing socket data structures */ sock->ops = ops; @@ -441,6 +439,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock, pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } + + /* Ensure tsk is visible before we read own_addr. */ + smp_mb(); + + tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, + NAMED_H_SIZE, 0); + msg_set_origport(msg, tsk->portid); setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); sk->sk_shutdown = 0; @@ -2234,24 +2239,27 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, void tipc_sk_reinit(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); - const struct bucket_table *tbl; - struct rhash_head *pos; + struct rhashtable_iter iter; struct tipc_sock *tsk; struct tipc_msg *msg; - int i; - rcu_read_lock(); - tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht); - for (i = 0; i < tbl->size; i++) { - rht_for_each_entry_rcu(tsk, pos, tbl, i, node) { + rhashtable_walk_enter(&tn->sk_rht, &iter); + + do { + tsk = ERR_PTR(rhashtable_walk_start(&iter)); + if (tsk) + continue; + + while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); msg = &tsk->phdr; msg_set_prevnode(msg, tn->own_addr); msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } - } - rcu_read_unlock(); + + rhashtable_walk_stop(&iter); + } while (tsk == ERR_PTR(-EAGAIN)); } static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid) diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index c06d3997c6e7..286ed25c1a69 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -6,6 +6,10 @@ config XFRM depends on NET select GRO_CELLS +config XFRM_OFFLOAD + bool + depends on XFRM + config XFRM_ALGO tristate select XFRM diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 3213fe8027be..46bdb4fbed0b 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -19,19 +19,18 @@ static struct kmem_cache *secpath_cachep __read_mostly; static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); -static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO]; +static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; static struct gro_cells gro_cells; static struct net_device xfrm_napi_dev; -int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) +int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) { int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) + if (WARN_ON(afinfo->family >= ARRAY_SIZE(xfrm_input_afinfo))) return -EAFNOSUPPORT; + spin_lock_bh(&xfrm_input_afinfo_lock); if (unlikely(xfrm_input_afinfo[afinfo->family] != NULL)) err = -EEXIST; @@ -42,14 +41,10 @@ int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_input_register_afinfo); -int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo) +int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo) { int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) - return -EAFNOSUPPORT; spin_lock_bh(&xfrm_input_afinfo_lock); if (likely(xfrm_input_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_input_afinfo[afinfo->family] != afinfo)) @@ -63,12 +58,13 @@ int xfrm_input_unregister_afinfo(struct xfrm_input_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_input_unregister_afinfo); -static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) +static const struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) { - struct xfrm_input_afinfo *afinfo; + const struct xfrm_input_afinfo *afinfo; - if (unlikely(family >= NPROTO)) + if (WARN_ON_ONCE(family >= ARRAY_SIZE(xfrm_input_afinfo))) return NULL; + rcu_read_lock(); afinfo = rcu_dereference(xfrm_input_afinfo[family]); if (unlikely(!afinfo)) @@ -76,22 +72,17 @@ static struct xfrm_input_afinfo *xfrm_input_get_afinfo(unsigned int family) return afinfo; } -static void xfrm_input_put_afinfo(struct xfrm_input_afinfo *afinfo) -{ - rcu_read_unlock(); -} - static int xfrm_rcv_cb(struct sk_buff *skb, unsigned int family, u8 protocol, int err) { int ret; - struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); + const struct xfrm_input_afinfo *afinfo = xfrm_input_get_afinfo(family); if (!afinfo) return -EAFNOSUPPORT; ret = afinfo->callback(skb, protocol, err); - xfrm_input_put_afinfo(afinfo); + rcu_read_unlock(); return ret; } @@ -114,6 +105,8 @@ struct sec_path *secpath_dup(struct sec_path *src) return NULL; sp->len = 0; + sp->olen = 0; + if (src) { int i; @@ -126,6 +119,24 @@ struct sec_path *secpath_dup(struct sec_path *src) } EXPORT_SYMBOL(secpath_dup); +int secpath_set(struct sk_buff *skb) +{ + struct sec_path *sp; + + /* Allocate new secpath or COW existing one. */ + if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { + sp = secpath_dup(skb->sp); + if (!sp) + return -ENOMEM; + + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + } + return 0; +} +EXPORT_SYMBOL(secpath_set); + /* Fetch spi and seq from ipsec header */ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) @@ -161,6 +172,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) *seq = *(__be32 *)(skb_transport_header(skb) + offset_seq); return 0; } +EXPORT_SYMBOL(xfrm_parse_spi); int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { @@ -195,14 +207,23 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) unsigned int family; int decaps = 0; int async = 0; + struct xfrm_offload *xo; + bool xfrm_gro = false; - /* A negative encap_type indicates async resumption. */ if (encap_type < 0) { - async = 1; x = xfrm_input_state(skb); - seq = XFRM_SKB_CB(skb)->seq.input.low; family = x->outer_mode->afinfo->family; - goto resume; + + /* An encap_type of -1 indicates async resumption. */ + if (encap_type == -1) { + async = 1; + seq = XFRM_SKB_CB(skb)->seq.input.low; + goto resume; + } + /* encap_type < -1 indicates a GRO call. */ + encap_type = 0; + seq = XFRM_SPI_SKB_CB(skb)->seq; + goto lock; } daddr = (xfrm_address_t *)(skb_network_header(skb) + @@ -221,18 +242,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) break; } - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { - struct sec_path *sp; - - sp = secpath_dup(skb->sp); - if (!sp) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); - goto drop; - } - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; + err = secpath_set(skb); + if (err) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); + goto drop; } seq = 0; @@ -256,6 +269,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; +lock: spin_lock(&x->lock); if (unlikely(x->km.state != XFRM_STATE_VALID)) { @@ -377,7 +391,18 @@ resume: gro_cells_receive(&gro_cells, skb); return 0; } else { - return x->inner_mode->afinfo->transport_finish(skb, async); + xo = xfrm_offload(skb); + if (xo) + xfrm_gro = xo->flags & XFRM_GRO; + + err = x->inner_mode->afinfo->transport_finish(skb, async); + if (xfrm_gro) { + skb_dst_drop(skb); + gro_cells_receive(&gro_cells, skb); + return err; + } + + return err; } drop_unlock: diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 0a0f63d3cc96..5f3e87866438 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -45,7 +45,7 @@ struct xfrm_flo { }; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); -static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] +static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; static struct kmem_cache *xfrm_dst_cache __read_mostly; @@ -103,11 +103,11 @@ bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl return false; } -static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) +static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) { - struct xfrm_policy_afinfo *afinfo; + const struct xfrm_policy_afinfo *afinfo; - if (unlikely(family >= NPROTO)) + if (unlikely(family >= ARRAY_SIZE(xfrm_policy_afinfo))) return NULL; rcu_read_lock(); afinfo = rcu_dereference(xfrm_policy_afinfo[family]); @@ -116,18 +116,13 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) return afinfo; } -static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo) -{ - rcu_read_unlock(); -} - static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, int family) { - struct xfrm_policy_afinfo *afinfo; + const struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; afinfo = xfrm_policy_get_afinfo(family); @@ -136,7 +131,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return dst; } @@ -1431,12 +1426,12 @@ xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, xfrm_address_t *remote, unsigned short family) { int err; - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EINVAL; err = afinfo->get_saddr(net, oif, local, remote); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return err; } @@ -1538,21 +1533,15 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, } -/* Check that the bundle accepts the flow and its components are - * still valid. - */ - -static inline int xfrm_get_tos(const struct flowi *fl, int family) +static int xfrm_get_tos(const struct flowi *fl, int family) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - int tos; + const struct xfrm_policy_afinfo *afinfo; + int tos = 0; - if (!afinfo) - return -EINVAL; - - tos = afinfo->get_tos(fl); + afinfo = xfrm_policy_get_afinfo(family); + tos = afinfo ? afinfo->get_tos(fl) : 0; - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return tos; } @@ -1609,7 +1598,7 @@ static const struct flow_cache_ops xfrm_bundle_fc_ops = { static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); struct dst_ops *dst_ops; struct xfrm_dst *xdst; @@ -1638,7 +1627,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) } else xdst = ERR_PTR(-ENOBUFS); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return xdst; } @@ -1646,7 +1635,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { - struct xfrm_policy_afinfo *afinfo = + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(dst->ops->family); int err; @@ -1655,7 +1644,7 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, err = afinfo->init_path(path, dst, nfheader_len); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return err; } @@ -1663,7 +1652,7 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { - struct xfrm_policy_afinfo *afinfo = + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); int err; @@ -1672,7 +1661,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, err = afinfo->fill_dst(xdst, dev, fl); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return err; } @@ -1705,9 +1694,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, xfrm_flowi_addr_get(fl, &saddr, &daddr, family); tos = xfrm_get_tos(fl, family); - err = tos; - if (tos < 0) - goto put_states; dst_hold(dst); @@ -2215,7 +2201,7 @@ error: static struct dst_entry *make_blackhole(struct net *net, u16 family, struct dst_entry *dst_orig) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); struct dst_entry *ret; if (!afinfo) { @@ -2224,7 +2210,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family, } else { ret = afinfo->blackhole_route(net, dst_orig); } - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return ret; } @@ -2466,7 +2452,7 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse) { - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); int err; if (unlikely(afinfo == NULL)) @@ -2474,7 +2460,7 @@ int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, afinfo->decode_session(skb, fl, reverse); err = security_xfrm_decode_session(skb, &fl->flowi_secid); - xfrm_policy_put_afinfo(afinfo); + rcu_read_unlock(); return err; } EXPORT_SYMBOL(__xfrm_decode_session); @@ -2742,10 +2728,11 @@ void xfrm_garbage_collect(struct net *net) } EXPORT_SYMBOL(xfrm_garbage_collect); -static void xfrm_garbage_collect_deferred(struct net *net) +void xfrm_garbage_collect_deferred(struct net *net) { flow_cache_flush_deferred(net); } +EXPORT_SYMBOL(xfrm_garbage_collect_deferred); static void xfrm_init_pmtu(struct dst_entry *dst) { @@ -2873,15 +2860,15 @@ static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) path->ops->confirm_neigh(path, daddr); } -int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) +int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family) { int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) + + if (WARN_ON(family >= ARRAY_SIZE(xfrm_policy_afinfo))) return -EAFNOSUPPORT; + spin_lock(&xfrm_policy_afinfo_lock); - if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) + if (unlikely(xfrm_policy_afinfo[family] != NULL)) err = -EEXIST; else { struct dst_ops *dst_ops = afinfo->dst_ops; @@ -2901,9 +2888,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(!dst_ops->confirm_neigh)) dst_ops->confirm_neigh = xfrm_confirm_neigh; - if (likely(afinfo->garbage_collect == NULL)) - afinfo->garbage_collect = xfrm_garbage_collect_deferred; - rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo); + rcu_assign_pointer(xfrm_policy_afinfo[family], afinfo); } spin_unlock(&xfrm_policy_afinfo_lock); @@ -2911,34 +2896,24 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_register_afinfo); -int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) +void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) { - int err = 0; - if (unlikely(afinfo == NULL)) - return -EINVAL; - if (unlikely(afinfo->family >= NPROTO)) - return -EAFNOSUPPORT; - spin_lock(&xfrm_policy_afinfo_lock); - if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) { - if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo)) - err = -EINVAL; - else - RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family], - NULL); + struct dst_ops *dst_ops = afinfo->dst_ops; + int i; + + for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) { + if (xfrm_policy_afinfo[i] != afinfo) + continue; + RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL); + break; } - spin_unlock(&xfrm_policy_afinfo_lock); - if (!err) { - struct dst_ops *dst_ops = afinfo->dst_ops; - synchronize_rcu(); + synchronize_rcu(); - dst_ops->kmem_cachep = NULL; - dst_ops->check = NULL; - dst_ops->negative_advice = NULL; - dst_ops->link_failure = NULL; - afinfo->garbage_collect = NULL; - } - return err; + dst_ops->kmem_cachep = NULL; + dst_ops->check = NULL; + dst_ops->negative_advice = NULL; + dst_ops->link_failure = NULL; } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c index 504058631ffc..4bfcaf93fcf3 100644 --- a/samples/bpf/test_cgrp2_attach.c +++ b/samples/bpf/test_cgrp2_attach.c @@ -104,7 +104,7 @@ static int attach_filter(int cg_fd, int type, int verdict) return EXIT_FAILURE; } - ret = bpf_prog_attach(prog_fd, cg_fd, type); + ret = bpf_prog_attach(prog_fd, cg_fd, type, 0); if (ret < 0) { printf("Failed to attach prog to cgroup: '%s'\n", strerror(errno)); diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index 6e69be37f87f..3049b1f26267 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -79,11 +79,12 @@ int main(int argc, char **argv) if (join_cgroup(FOO)) goto err; - if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS)) { + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) { log_err("Attaching prog to /foo"); goto err; } + printf("Attached DROP prog. This ping in cgroup /foo should fail...\n"); assert(system(PING_CMD) != 0); /* Create cgroup /foo/bar, get fd, and join it */ @@ -94,24 +95,27 @@ int main(int argc, char **argv) if (join_cgroup(BAR)) goto err; + printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n"); assert(system(PING_CMD) != 0); - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) { + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { log_err("Attaching prog to /foo/bar"); goto err; } + printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n"); assert(system(PING_CMD) == 0); - if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { log_err("Detaching program from /foo/bar"); goto err; } + printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n" + "This ping in cgroup /foo/bar should fail...\n"); assert(system(PING_CMD) != 0); - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS)) { + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { log_err("Attaching prog to /foo/bar"); goto err; } @@ -121,8 +125,60 @@ int main(int argc, char **argv) goto err; } + printf("Attached PASS from /foo/bar and detached DROP from /foo.\n" + "This ping in cgroup /foo/bar should pass...\n"); assert(system(PING_CMD) == 0); + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { + log_err("Attaching prog to /foo/bar"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { + errno = 0; + log_err("Unexpected success attaching prog to /foo/bar"); + goto err; + } + + if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching program from /foo/bar"); + goto err; + } + + if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { + errno = 0; + log_err("Unexpected success in double detach from /foo"); + goto err; + } + + if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { + log_err("Attaching non-overridable prog to /foo"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { + errno = 0; + log_err("Unexpected success attaching non-overridable prog to /foo/bar"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { + errno = 0; + log_err("Unexpected success attaching overridable prog to /foo/bar"); + goto err; + } + + if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) { + errno = 0; + log_err("Unexpected success attaching overridable prog to /foo"); + goto err; + } + + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { + log_err("Attaching different non-overridable prog to /foo"); + goto err; + } + goto out; err: @@ -132,5 +188,9 @@ out: close(foo); close(bar); cleanup_cgroup_environment(); + if (!rc) + printf("PASS\n"); + else + printf("FAIL\n"); return rc; } diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c index 0791b949cbe4..c3cfb23e23b5 100644 --- a/samples/bpf/test_cgrp2_sock.c +++ b/samples/bpf/test_cgrp2_sock.c @@ -75,7 +75,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } - ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE); + ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0); if (ret < 0) { printf("Failed to attach prog to cgroup: '%s'\n", strerror(errno)); diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c index 455ef0d06e93..db036077b644 100644 --- a/samples/bpf/test_cgrp2_sock2.c +++ b/samples/bpf/test_cgrp2_sock2.c @@ -55,7 +55,7 @@ int main(int argc, char **argv) } ret = bpf_prog_attach(prog_fd[filter_id], cg_fd, - BPF_CGROUP_INET_SOCK_CREATE); + BPF_CGROUP_INET_SOCK_CREATE, 0); if (ret < 0) { printf("Failed to attach prog to cgroup: '%s'\n", strerror(errno)); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e07fd5a324e6..0539a0ceef38 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -123,6 +123,12 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command + * to the given target_fd cgroup the descendent cgroup will be able to + * override effective bpf program that was inherited from this cgroup + */ +#define BPF_F_ALLOW_OVERRIDE (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -178,6 +184,7 @@ union bpf_attr { __u32 target_fd; /* container object to attach to */ __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; + __u32 attach_flags; }; } __attribute__((aligned(8))); diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 50e04cc5dddd..d48b70ceb25a 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -42,21 +42,15 @@ # endif #endif -static __u64 ptr_to_u64(const void *ptr) +static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; } -static int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, - unsigned int size) +static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size) { -#ifdef __NR_bpf return syscall(__NR_bpf, cmd, attr, size); -#else - fprintf(stderr, "No bpf syscall, kernel headers too old?\n"); - errno = ENOSYS; - return -1; -#endif } int bpf_create_map(enum bpf_map_type map_type, int key_size, @@ -174,7 +168,8 @@ int bpf_obj_get(const char *pathname) return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); } -int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) +int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, + unsigned int flags) { union bpf_attr attr; @@ -182,6 +177,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type) attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; + attr.attach_flags = flags; return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 88f07c15423a..9f838aa6d26e 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -41,7 +41,8 @@ int bpf_map_delete_elem(int fd, const void *key); int bpf_map_get_next_key(int fd, const void *key, void *next_key); int bpf_obj_pin(int fd, const char *pathname); int bpf_obj_get(const char *pathname); -int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type); +int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, + unsigned int flags); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 9ff0db4e2d0c..933aeec46f4a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1199,7 +1199,7 @@ static int ui_init(void) BUG_ON(1); } - perf_hpp__register_sort_field(fmt); + perf_hpp__prepend_sort_field(fmt); return 0; } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 37388397b5bc..18cfcdc90356 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -521,6 +521,12 @@ void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, list_add_tail(&format->sort_list, &list->sorts); } +void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format) +{ + list_add(&format->sort_list, &list->sorts); +} + void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { list_del(&format->list); @@ -560,6 +566,10 @@ void perf_hpp__setup_output_field(struct perf_hpp_list *list) perf_hpp_list__for_each_sort_list(list, fmt) { struct perf_hpp_fmt *pos; + /* skip sort-only fields ("sort_compute" in perf diff) */ + if (!fmt->entry && !fmt->color) + continue; + perf_hpp_list__for_each_format(list, pos) { if (fmt_equal(fmt, pos)) goto next; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 42922512c1c6..8b610dd9e2f6 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -437,7 +437,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) } call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; - call->ms.map = cursor_node->map; + call->ms.map = map__get(cursor_node->map); if (cursor_node->branch) { call->branch_count = 1; @@ -477,6 +477,7 @@ add_child(struct callchain_node *parent, list_for_each_entry_safe(call, tmp, &new->val, list) { list_del(&call->list); + map__zput(call->ms.map); free(call); } free(new); @@ -761,6 +762,7 @@ merge_chain_branch(struct callchain_cursor *cursor, list->ms.map, list->ms.sym, false, NULL, 0, 0); list_del(&list->list); + map__zput(list->ms.map); free(list); } @@ -811,7 +813,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor, } node->ip = ip; - node->map = map; + map__zput(node->map); + node->map = map__get(map); node->sym = sym; node->branch = branch; node->nr_loop_iter = nr_loop_iter; @@ -1142,11 +1145,13 @@ static void free_callchain_node(struct callchain_node *node) list_for_each_entry_safe(list, tmp, &node->parent_val, list) { list_del(&list->list); + map__zput(list->ms.map); free(list); } list_for_each_entry_safe(list, tmp, &node->val, list) { list_del(&list->list); + map__zput(list->ms.map); free(list); } @@ -1210,6 +1215,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) goto out; *new = *chain; new->has_children = false; + map__get(new->ms.map); list_add_tail(&new->list, &head); } parent = parent->parent; @@ -1230,6 +1236,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) out: list_for_each_entry_safe(chain, new, &head, list) { list_del(&chain->list); + map__zput(chain->ms.map); free(chain); } return -ENOMEM; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 35c8e379530f..4f4b60f1558a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -5,6 +5,7 @@ #include <linux/list.h> #include <linux/rbtree.h> #include "event.h" +#include "map.h" #include "symbol.h" #define HELP_PAD "\t\t\t\t" @@ -184,8 +185,13 @@ int callchain_merge(struct callchain_cursor *cursor, */ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) { + struct callchain_cursor_node *node; + cursor->nr = 0; cursor->last = &cursor->first; + + for (node = cursor->first; node != NULL; node = node->next) + map__zput(node->map); } int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6770a9645609..7d1b7d33e644 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1,6 +1,7 @@ #include "util.h" #include "build-id.h" #include "hist.h" +#include "map.h" #include "session.h" #include "sort.h" #include "evlist.h" @@ -1019,6 +1020,10 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg) { int err, err2; + struct map *alm = NULL; + + if (al && al->map) + alm = map__get(al->map); err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, iter->evsel, al, max_stack_depth); @@ -1058,6 +1063,8 @@ out: if (!err) err = err2; + map__put(alm); + return err; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d4b6514eeef5..28c216e3d5b7 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -283,6 +283,8 @@ void perf_hpp_list__column_register(struct perf_hpp_list *list, struct perf_hpp_fmt *format); void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, struct perf_hpp_fmt *format); +void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format); static inline void perf_hpp__column_register(struct perf_hpp_fmt *format) { @@ -294,6 +296,11 @@ static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) perf_hpp_list__register_sort_field(&perf_hpp_list, format); } +static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format) +{ + perf_hpp_list__prepend_sort_field(&perf_hpp_list, format); +} + #define perf_hpp_list__for_each_format(_list, format) \ list_for_each_entry(format, &(_list)->fields, list) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index a35f564f66a1..c7816fe60feb 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,13 +1,24 @@ -CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I../../../lib +LIBDIR := ../../../lib +BPFOBJ := $(LIBDIR)/bpf/bpf.o + +CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) test_objs = test_verifier test_tag test_maps test_lru_map test_lpm_map TEST_PROGS := $(test_objs) test_kmod.sh TEST_FILES := $(test_objs) +.PHONY: all clean force + all: $(test_objs) -$(test_objs): ../../../lib/bpf/bpf.o +# force a rebuild of BPFOBJ when its dependencies are updated +force: + +$(BPFOBJ): force + $(MAKE) -C $(dir $(BPFOBJ)) + +$(test_objs): $(BPFOBJ) include ../lib.mk |