From 6ada1fc0e1c4775de0e043e1bd3ae9d065491aa5 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 3 Dec 2014 19:22:48 -0500 Subject: time: settimeofday: Validate the values of tv from user An unvalidated user input is multiplied by a constant, which can result in an undefined behaviour for large values. While this is validated later, we should avoid triggering undefined behaviour. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: stable Signed-off-by: Sasha Levin [jstultz: include trivial milisecond->microsecond correction noticed by Andy] Signed-off-by: John Stultz --- include/linux/time.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 8c42cf8d2444..5989b0ead1ec 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -99,6 +99,19 @@ static inline bool timespec_valid_strict(const struct timespec *ts) return true; } +static inline bool timeval_valid(const struct timeval *tv) +{ + /* Dates before 1970 are bogus */ + if (tv->tv_sec < 0) + return false; + + /* Can't have more microseconds then a second */ + if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) + return false; + + return true; +} + extern struct timespec timespec_trunc(struct timespec t, unsigned gran); #define CURRENT_TIME (current_kernel_time()) -- cgit v1.2.3-59-g8ed1b From e61f7d1c3c07a7e51036b0796749edb00deff845 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 8 Jan 2015 10:34:27 -0500 Subject: libata: Whitelist SSDs that are known to properly return zeroes after TRIM As defined, the DRAT (Deterministic Read After Trim) and RZAT (Return Zero After Trim) flags in the ATA Command Set are unreliable in the sense that they only define what happens if the device successfully executed the DSM TRIM command. TRIM is only advisory, however, and the device is free to silently ignore all or parts of the request. In practice this renders the DRAT and RZAT flags completely useless and because the results are unpredictable we decided to disable discard in MD for 3.18 to avoid the risk of data corruption. Hardware vendors in the real world obviously need better guarantees than what the standards bodies provide. Unfortuntely those guarantees are encoded in product requirements documents rather than somewhere we can key off of them programatically. So we are compelled to disabling discard_zeroes_data for all devices unless we explicitly have data to support whitelisting them. This patch whitelists SSDs from a few of the main vendors. None of the whitelists are based on written guarantees. They are purely based on empirical evidence collected from internal and external users that have tested or qualified these drives in RAID deployments. The whitelist is only meant as a starting point and is by no means comprehensive: - All intel SSD models except for 510 - Micron M5?0/M600 - Samsung SSDs - Seagate SSDs Signed-off-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 31 +++++++++++++++++++++++++++---- drivers/ata/libata-scsi.c | 10 ++++++---- include/linux/libata.h | 1 + 3 files changed, 34 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 5c84fb5c3372..23c2ae03a7ab 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4233,10 +4233,33 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER }, /* devices that don't properly handle queued TRIM commands */ - { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, - { "Crucial_CT???M500SSD*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, - { "Micron_M550*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, - { "Crucial_CT*M550SSD*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, + { "Micron_M[56]*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Crucial_CT*SSD*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, + + /* + * As defined, the DRAT (Deterministic Read After Trim) and RZAT + * (Return Zero After Trim) flags in the ATA Command Set are + * unreliable in the sense that they only define what happens if + * the device successfully executed the DSM TRIM command. TRIM + * is only advisory, however, and the device is free to silently + * ignore all or parts of the request. + * + * Whitelist drives that are known to reliably return zeroes + * after TRIM. + */ + + /* + * The intel 510 drive has buggy DRAT/RZAT. Explicitly exclude + * that model before whitelisting all other intel SSDs. + */ + { "INTEL*SSDSC2MH*", NULL, 0, }, + + { "INTEL*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "SSD*INTEL*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Samsung*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "SAMSUNG*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "ST[1248][0248]0[FH]*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, /* * Some WD SATA-I drives spin up and down erratically when the link diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index e364e86e84d7..6abd17a85b13 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2532,13 +2532,15 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) rbuf[15] = lowest_aligned; if (ata_id_has_trim(args->id)) { - rbuf[14] |= 0x80; /* TPE */ + rbuf[14] |= 0x80; /* LBPME */ - if (ata_id_has_zero_after_trim(args->id)) - rbuf[14] |= 0x40; /* TPRZ */ + if (ata_id_has_zero_after_trim(args->id) && + dev->horkage & ATA_HORKAGE_ZERO_AFTER_TRIM) { + ata_dev_info(dev, "Enabling discard_zeroes_data\n"); + rbuf[14] |= 0x40; /* LBPRZ */ + } } } - return 0; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 2d182413b1db..f2b440e44fd7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -422,6 +422,7 @@ enum { ATA_HORKAGE_NO_NCQ_TRIM = (1 << 19), /* don't use queued TRIM */ ATA_HORKAGE_NOLPM = (1 << 20), /* don't use LPM */ ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ + ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3-59-g8ed1b From f331a859e0ee5a898c1f47596eddad4c4f02d657 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 15 Jan 2015 18:16:04 -0600 Subject: PCI: Add flag for devices where we can't use bus reset Enable a mechanism for devices to quirk that they do not behave when doing a PCI bus reset. We require a modest level of spec compliant behavior in order to do a reset, for instance the device should come out of reset without throwing errors and PCI config space should be accessible after reset. This is too much to ask for some devices. Link: http://lkml.kernel.org/r/20140923210318.498dacbd@dualc.maya.org Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v3.14+ --- drivers/pci/pci.c | 40 ++++++++++++++++++++++++++++++++++++---- include/linux/pci.h | 2 ++ 2 files changed, 38 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index cab05f31223f..e9d4fd861ba1 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3271,7 +3271,8 @@ static int pci_parent_bus_reset(struct pci_dev *dev, int probe) { struct pci_dev *pdev; - if (pci_is_root_bus(dev->bus) || dev->subordinate || !dev->bus->self) + if (pci_is_root_bus(dev->bus) || dev->subordinate || + !dev->bus->self || dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET) return -ENOTTY; list_for_each_entry(pdev, &dev->bus->devices, bus_list) @@ -3305,7 +3306,8 @@ static int pci_dev_reset_slot_function(struct pci_dev *dev, int probe) { struct pci_dev *pdev; - if (dev->subordinate || !dev->slot) + if (dev->subordinate || !dev->slot || + dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET) return -ENOTTY; list_for_each_entry(pdev, &dev->bus->devices, bus_list) @@ -3557,6 +3559,20 @@ int pci_try_reset_function(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_try_reset_function); +/* Do any devices on or below this bus prevent a bus reset? */ +static bool pci_bus_resetable(struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET || + (dev->subordinate && !pci_bus_resetable(dev->subordinate))) + return false; + } + + return true; +} + /* Lock devices from the top of the tree down */ static void pci_bus_lock(struct pci_bus *bus) { @@ -3607,6 +3623,22 @@ unlock: return 0; } +/* Do any devices on or below this slot prevent a bus reset? */ +static bool pci_slot_resetable(struct pci_slot *slot) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &slot->bus->devices, bus_list) { + if (!dev->slot || dev->slot != slot) + continue; + if (dev->dev_flags & PCI_DEV_FLAGS_NO_BUS_RESET || + (dev->subordinate && !pci_bus_resetable(dev->subordinate))) + return false; + } + + return true; +} + /* Lock devices from the top of the tree down */ static void pci_slot_lock(struct pci_slot *slot) { @@ -3728,7 +3760,7 @@ static int pci_slot_reset(struct pci_slot *slot, int probe) { int rc; - if (!slot) + if (!slot || !pci_slot_resetable(slot)) return -ENOTTY; if (!probe) @@ -3820,7 +3852,7 @@ EXPORT_SYMBOL_GPL(pci_try_reset_slot); static int pci_bus_reset(struct pci_bus *bus, int probe) { - if (!bus->self) + if (!bus->self || !pci_bus_resetable(bus)) return -ENOTTY; if (probe) diff --git a/include/linux/pci.h b/include/linux/pci.h index 360a966a97a5..44627f1df4ca 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -175,6 +175,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4), /* Use a PCIe-to-PCI bridge alias even if !pci_is_pcie */ PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS = (__force pci_dev_flags_t) (1 << 5), + /* Do not use bus resets for device */ + PCI_DEV_FLAGS_NO_BUS_RESET = (__force pci_dev_flags_t) (1 << 6), }; enum pci_irq_reroute_variant { -- cgit v1.2.3-59-g8ed1b From 8505e729a2f6eb0803ff943a15f133dd10afff3a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 15 Jan 2015 16:21:49 -0600 Subject: PCI: Add pci_claim_bridge_resource() to clip window if necessary Add pci_claim_bridge_resource() to claim a PCI-PCI bridge window. This is like regular pci_claim_resource(), except that if we fail to claim the window, we check to see if we can reduce the size of the window and try again. This is for scenarios like this: pci_bus 0000:00: root bus resource [mem 0xc0000000-0xffffffff] pci 0000:00:01.0: bridge window [mem 0xbdf00000-0xddefffff 64bit pref] pci 0000:01:00.0: reg 0x10: [mem 0xc0000000-0xcfffffff pref] The 00:01.0 window is illegal: it starts before the host bridge window, so we have to assume the [0xbdf00000-0xbfffffff] region is inaccessible. We can make it legal by clipping it to [mem 0xc0000000-0xddefffff 64bit pref]. Previously we discarded the 00:01.0 window and tried to reassign that part of the hierarchy from scratch. That is a problem because Linux doesn't always assign things optimally. For example, in this case, BIOS put the 01:00.0 device in a prefetchable window below 4GB, but after 5b28541552ef, Linux puts the prefetchable window above 4GB where the 32-bit 01:00.0 device can't use it. Clipping the 00:01.0 window is less intrusive than completely reassigning things and is sufficient to let us use most of the BIOS configuration. Of course, it's possible that devices below 00:01.0 will no longer fit. If that's the case, we'll have to reassign things. But that's a separate problem. [bhelgaas: changelog, split into separate patch] Link: https://bugzilla.kernel.org/show_bug.cgi?id=85491 Reported-by: Marek Kordik Fixes: 5b28541552ef ("PCI: Restrict 64-bit prefetchable bridge windows to 64-bit resources") Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org # v3.16+ --- drivers/pci/setup-bus.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/pci.h | 1 + 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 802f56be2149..e3e17f3c0f0f 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -646,6 +646,41 @@ void pci_setup_bridge(struct pci_bus *bus) __pci_setup_bridge(bus, type); } + +int pci_claim_bridge_resource(struct pci_dev *bridge, int i) +{ + if (i < PCI_BRIDGE_RESOURCES || i > PCI_BRIDGE_RESOURCE_END) + return 0; + + if (pci_claim_resource(bridge, i) == 0) + return 0; /* claimed the window */ + + if ((bridge->class >> 8) != PCI_CLASS_BRIDGE_PCI) + return 0; + + if (!pci_bus_clip_resource(bridge, i)) + return -EINVAL; /* clipping didn't change anything */ + + switch (i - PCI_BRIDGE_RESOURCES) { + case 0: + pci_setup_bridge_io(bridge); + break; + case 1: + pci_setup_bridge_mmio(bridge); + break; + case 2: + pci_setup_bridge_mmio_pref(bridge); + break; + default: + return -EINVAL; + } + + if (pci_claim_resource(bridge, i) == 0) + return 0; /* claimed a smaller window */ + + return -EINVAL; +} + /* Check whether the bridge supports optional I/O and prefetchable memory ranges. If not, the respective base/limit registers must be read-only and read as 0. */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 44627f1df4ca..9603094ed59b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1067,6 +1067,7 @@ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx); void pci_bus_assign_resources(const struct pci_bus *bus); void pci_bus_size_bridges(struct pci_bus *bus); int pci_claim_resource(struct pci_dev *, int); +int pci_claim_bridge_resource(struct pci_dev *bridge, int i); void pci_assign_unassigned_resources(void); void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge); void pci_assign_unassigned_bus_resources(struct pci_bus *bus); -- cgit v1.2.3-59-g8ed1b From ee1c244219fd652964710a6cc3e4f922e86aa492 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 16 Jan 2015 11:37:14 +0100 Subject: genetlink: synchronize socket closing and family removal In addition to the problem Jeff Layton reported, I looked at the code and reproduced the same warning by subscribing and removing the genl family with a socket still open. This is a fairly tricky race which originates in the fact that generic netlink allows the family to go away while sockets are still open - unlike regular netlink which has a module refcount for every open socket so in general this cannot be triggered. Trying to resolve this issue by the obvious locking isn't possible as it will result in deadlocks between unregistration and group unbind notification (which incidentally lockdep doesn't find due to the home grown locking in the netlink table.) To really resolve this, introduce a "closing socket" reference counter (for generic netlink only, as it's the only affected family) in the core netlink code and use that in generic netlink to wait for all the sockets that are being closed at the same time as a generic netlink family is removed. This fixes the race that when a socket is closed, it will should call the unbind, but if the family is removed at the same time the unbind will not find it, leading to the warning. The real problem though is that in this case the unbind could actually find a new family that is registered to have a multicast group with the same ID, and call its mcast_unbind() leading to confusing. Also remove the warning since it would still trigger, but is now no longer a problem. This also moves the code in af_netlink.c to before unreferencing the module to avoid having the same problem in the normal non-genl case. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/genetlink.h | 4 ++++ include/net/genetlink.h | 5 ++++- net/netlink/af_netlink.c | 24 +++++++++++++++++------- net/netlink/af_netlink.h | 1 + net/netlink/genetlink.c | 16 +++++++++------- 5 files changed, 35 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 55b685719d52..09460d6d6682 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -11,6 +11,10 @@ extern void genl_unlock(void); extern int lockdep_genl_is_held(void); #endif +/* for synchronisation between af_netlink and genetlink */ +extern atomic_t genl_sk_destructing_cnt; +extern wait_queue_head_t genl_sk_destructing_waitq; + /** * rcu_dereference_genl - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 2ea2c55bdc87..6c92415311ca 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -35,7 +35,10 @@ struct genl_info; * undo operations done by pre_doit, for example release locks * @mcast_bind: a socket bound to the given multicast group (which * is given as the offset into the groups array) - * @mcast_unbind: a socket was unbound from the given multicast group + * @mcast_unbind: a socket was unbound from the given multicast group. + * Note that unbind() will not be called symmetrically if the + * generic netlink family is removed while there are still open + * sockets. * @attrbuf: buffer to store parsed attributes * @family_list: family list * @mcgrps: multicast groups used by this family (private) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 84ea76ca3f1f..02fdde28dada 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -1095,6 +1096,8 @@ static void netlink_remove(struct sock *sk) __sk_del_bind_node(sk); netlink_update_listeners(sk); } + if (sk->sk_protocol == NETLINK_GENERIC) + atomic_inc(&genl_sk_destructing_cnt); netlink_table_ungrab(); } @@ -1211,6 +1214,20 @@ static int netlink_release(struct socket *sock) * will be purged. */ + /* must not acquire netlink_table_lock in any way again before unbind + * and notifying genetlink is done as otherwise it might deadlock + */ + if (nlk->netlink_unbind) { + int i; + + for (i = 0; i < nlk->ngroups; i++) + if (test_bit(i, nlk->groups)) + nlk->netlink_unbind(sock_net(sk), i + 1); + } + if (sk->sk_protocol == NETLINK_GENERIC && + atomic_dec_return(&genl_sk_destructing_cnt) == 0) + wake_up(&genl_sk_destructing_waitq); + sock->sk = NULL; wake_up_interruptible_all(&nlk->wait); @@ -1246,13 +1263,6 @@ static int netlink_release(struct socket *sock) netlink_table_ungrab(); } - if (nlk->netlink_unbind) { - int i; - - for (i = 0; i < nlk->ngroups; i++) - if (test_bit(i, nlk->groups)) - nlk->netlink_unbind(sock_net(sk), i + 1); - } kfree(nlk->groups); nlk->groups = NULL; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index f123a88496f8..f1c31b39aa3e 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -2,6 +2,7 @@ #define _AF_NETLINK_H #include +#include #include #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index c18d3f5624b2..ee57459fc258 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -23,6 +23,9 @@ static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ static DECLARE_RWSEM(cb_lock); +atomic_t genl_sk_destructing_cnt = ATOMIC_INIT(0); +DECLARE_WAIT_QUEUE_HEAD(genl_sk_destructing_waitq); + void genl_lock(void) { mutex_lock(&genl_mutex); @@ -435,15 +438,18 @@ int genl_unregister_family(struct genl_family *family) genl_lock_all(); - genl_unregister_mc_groups(family); - list_for_each_entry(rc, genl_family_chain(family->id), family_list) { if (family->id != rc->id || strcmp(rc->name, family->name)) continue; + genl_unregister_mc_groups(family); + list_del(&rc->family_list); family->n_ops = 0; - genl_unlock_all(); + up_write(&cb_lock); + wait_event(genl_sk_destructing_waitq, + atomic_read(&genl_sk_destructing_cnt) == 0); + genl_unlock(); kfree(family->attrbuf); genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0); @@ -1014,7 +1020,6 @@ static int genl_bind(struct net *net, int group) static void genl_unbind(struct net *net, int group) { int i; - bool found = false; down_read(&cb_lock); for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { @@ -1027,14 +1032,11 @@ static void genl_unbind(struct net *net, int group) if (f->mcast_unbind) f->mcast_unbind(net, fam_grp); - found = true; break; } } } up_read(&cb_lock); - - WARN_ON(!found); } static int __net_init genl_pernet_init(struct net *net) -- cgit v1.2.3-59-g8ed1b From 72dd299d5039a336493993dcc63413cf31d0e662 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 16 Jan 2015 15:13:02 -0800 Subject: libata: allow sata_sil24 to opt-out of tag ordered submission Ronny reports: https://bugzilla.kernel.org/show_bug.cgi?id=87101 "Since commit 8a4aeec8d "libata/ahci: accommodate tag ordered controllers" the access to the harddisk on the first SATA-port is failing on its first access. The access to the harddisk on the second port is working normal. When reverting the above commit, access to both harddisks is working fine again." Maintain tag ordered submission as the default, but allow sata_sil24 to continue with the old behavior. Cc: Cc: Tejun Heo Reported-by: Ronny Hegewald Signed-off-by: Dan Williams Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 5 ++++- drivers/ata/sata_sil24.c | 2 +- include/linux/libata.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 23c2ae03a7ab..d1a05f9bb91f 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4771,7 +4771,10 @@ static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) return NULL; for (i = 0, tag = ap->last_tag + 1; i < max_queue; i++, tag++) { - tag = tag < max_queue ? tag : 0; + if (ap->flags & ATA_FLAG_LOWTAG) + tag = i; + else + tag = tag < max_queue ? tag : 0; /* the last tag is reserved for internal command. */ if (tag == ATA_TAG_INTERNAL) diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index d81b20ddb527..ea655949023f 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -246,7 +246,7 @@ enum { /* host flags */ SIL24_COMMON_FLAGS = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ | ATA_FLAG_ACPI_SATA | - ATA_FLAG_AN | ATA_FLAG_PMP, + ATA_FLAG_AN | ATA_FLAG_PMP | ATA_FLAG_LOWTAG, SIL24_FLAG_PCIX_IRQ_WOC = (1 << 24), /* IRQ loss errata on PCI-X */ IRQ_STAT_4PORTS = 0xf, diff --git a/include/linux/libata.h b/include/linux/libata.h index f2b440e44fd7..91f705de2c0b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -231,6 +231,7 @@ enum { ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity * led */ ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */ + ATA_FLAG_LOWTAG = (1 << 24), /* host wants lowest available tag */ /* bits 24:31 of ap->flags are reserved for LLD specific flags */ -- cgit v1.2.3-59-g8ed1b From 54c523127bcca986c6f9b04c7b56a949ea011899 Mon Sep 17 00:00:00 2001 From: Matt Wagantall Date: Mon, 15 Dec 2014 23:47:23 +0000 Subject: iopoll: Introduce memory-mapped IO polling macros It is sometimes necessary to poll a memory-mapped register until its value satisfies some condition. Introduce a family of convenience macros that do this. Tight-looping, sleeping, and timing out can all be accomplished using these macros. Cc: Thierry Reding Cc: Andrew Morton Cc: Robert Elliott Acked-by: Arnd Bergmann Acked-by: Will Deacon Signed-off-by: Matt Wagantall Signed-off-by: Mitchel Humpherys Signed-off-by: Will Deacon --- include/linux/iopoll.h | 144 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 include/linux/iopoll.h (limited to 'include/linux') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h new file mode 100644 index 000000000000..1c30014ed176 --- /dev/null +++ b/include/linux/iopoll.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_IOPOLL_H +#define _LINUX_IOPOLL_H + +#include +#include +#include +#include +#include +#include + +/** + * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs + * @op: accessor function (takes @addr as its only argument) + * @addr: Address to poll + * @val: Variable to read the value into + * @cond: Break condition (usually involving @val) + * @sleep_us: Maximum time to sleep between reads in us (0 + * tight-loops). Should be less than ~20ms since usleep_range + * is used (see Documentation/timers/timers-howto.txt). + * @timeout_us: Timeout in us, 0 means never timeout + * + * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. + * + * When available, you'll probably want to use one of the specialized + * macros defined below rather than this macro directly. + */ +#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \ +({ \ + ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ + might_sleep_if(sleep_us); \ + for (;;) { \ + (val) = op(addr); \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ + (val) = op(addr); \ + break; \ + } \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + +/** + * readx_poll_timeout_atomic - Periodically poll an address until a condition is met or a timeout occurs + * @op: accessor function (takes @addr as its only argument) + * @addr: Address to poll + * @val: Variable to read the value into + * @cond: Break condition (usually involving @val) + * @delay_us: Time to udelay between reads in us (0 tight-loops). Should + * be less than ~10us since udelay is used (see + * Documentation/timers/timers-howto.txt). + * @timeout_us: Timeout in us, 0 means never timeout + * + * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. + * + * When available, you'll probably want to use one of the specialized + * macros defined below rather than this macro directly. + */ +#define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \ +({ \ + ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ + for (;;) { \ + (val) = op(addr); \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ + (val) = op(addr); \ + break; \ + } \ + if (delay_us) \ + udelay(delay_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + + +#define readb_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readb, addr, val, cond, delay_us, timeout_us) + +#define readb_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readb, addr, val, cond, delay_us, timeout_us) + +#define readw_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readw, addr, val, cond, delay_us, timeout_us) + +#define readw_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readw, addr, val, cond, delay_us, timeout_us) + +#define readl_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readl, addr, val, cond, delay_us, timeout_us) + +#define readl_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readl, addr, val, cond, delay_us, timeout_us) + +#define readq_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readq, addr, val, cond, delay_us, timeout_us) + +#define readq_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readq, addr, val, cond, delay_us, timeout_us) + +#define readb_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readb_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readb_relaxed_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readb_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readw_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readw_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readw_relaxed_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readw_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readl_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readl_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readl_relaxed_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readl_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readq_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readq_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readq_relaxed_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readq_relaxed, addr, val, cond, delay_us, timeout_us) + +#endif /* _LINUX_IOPOLL_H */ -- cgit v1.2.3-59-g8ed1b From d453cded05ee219b77815ea194dc36efa5398bca Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 20 Jan 2015 09:07:04 +1030 Subject: module_arch_freeing_init(): new hook for archs before module->module_init freed. Archs have been abusing module_free() to clean up their arch-specific allocations. Since module_free() is also (ab)used by BPF and trace code, let's keep it to simple allocations, and provide a hook called before that. This means that avr32, ia64, parisc and s390 no longer need to implement their own module_free() at all. avr32 doesn't need module_finalize() either. Signed-off-by: Rusty Russell Cc: Chris Metcalf Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Tony Luck Cc: Fenghua Yu Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux-kernel@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linux-s390@vger.kernel.org --- arch/avr32/kernel/module.c | 13 +------------ arch/ia64/kernel/module.c | 6 ++---- arch/parisc/kernel/module.c | 6 +----- arch/s390/kernel/module.c | 10 +++------- arch/tile/kernel/module.c | 2 +- include/linux/moduleloader.h | 2 ++ kernel/module.c | 7 +++++++ 7 files changed, 17 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 2c9412908024..164efa009e5b 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c @@ -19,12 +19,10 @@ #include #include -void module_free(struct module *mod, void *module_region) +void module_arch_freeing_init(struct module *mod) { vfree(mod->arch.syminfo); mod->arch.syminfo = NULL; - - vfree(module_region); } static inline int check_rela(Elf32_Rela *rela, struct module *module, @@ -291,12 +289,3 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, return ret; } - -int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, - struct module *module) -{ - vfree(module->arch.syminfo); - module->arch.syminfo = NULL; - - return 0; -} diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index 24603be24c14..29754aae5177 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -305,14 +305,12 @@ plt_target (struct plt_entry *plt) #endif /* !USE_BRL */ void -module_free (struct module *mod, void *module_region) +module_arch_freeing_init (struct module *mod) { - if (mod && mod->arch.init_unw_table && - module_region == mod->module_init) { + if (mod->arch.init_unw_table) { unw_remove_unwind_table(mod->arch.init_unw_table); mod->arch.init_unw_table = NULL; } - vfree(module_region); } /* Have we already seen one of these relocations? */ diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 50dfafc3f2c1..5822e8e200e6 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -298,14 +298,10 @@ static inline unsigned long count_stubs(const Elf_Rela *rela, unsigned long n) } #endif - -/* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) +void module_arch_freeing_init(struct module *mod) { kfree(mod->arch.section); mod->arch.section = NULL; - - vfree(module_region); } /* Additional bytes needed in front of individual sections */ diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index b89b59158b95..409d152585be 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -55,14 +55,10 @@ void *module_alloc(unsigned long size) } #endif -/* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) +void module_arch_freeing_init(struct module *mod) { - if (mod) { - vfree(mod->arch.syminfo); - mod->arch.syminfo = NULL; - } - vfree(module_region); + vfree(mod->arch.syminfo); + mod->arch.syminfo = NULL; } static void check_rela(Elf_Rela *rela, struct module *me) diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c index 96447c9160a0..62a597e810d6 100644 --- a/arch/tile/kernel/module.c +++ b/arch/tile/kernel/module.c @@ -83,7 +83,7 @@ void module_free(struct module *mod, void *module_region) 0, 0, 0, NULL, NULL, 0); /* - * FIXME: If module_region == mod->module_init, trim exception + * FIXME: Add module_arch_freeing_init to trim exception * table entries. */ } diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 7eeb9bbfb816..054eac853090 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -82,4 +82,6 @@ int module_finalize(const Elf_Ehdr *hdr, /* Any cleanup needed when module leaves. */ void module_arch_cleanup(struct module *mod); +/* Any cleanup before freeing mod->module_init */ +void module_arch_freeing_init(struct module *mod); #endif diff --git a/kernel/module.c b/kernel/module.c index 3965511ae133..68be0b1f9e7f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1804,6 +1804,10 @@ void __weak module_arch_cleanup(struct module *mod) { } +void __weak module_arch_freeing_init(struct module *mod) +{ +} + /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { @@ -1841,6 +1845,7 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ unset_module_init_ro_nx(mod); + module_arch_freeing_init(mod); module_free(mod, mod->module_init); kfree(mod->args); percpu_modfree(mod); @@ -2930,6 +2935,7 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) static void module_deallocate(struct module *mod, struct load_info *info) { percpu_modfree(mod); + module_arch_freeing_init(mod); module_free(mod, mod->module_init); module_free(mod, mod->module_core); } @@ -3055,6 +3061,7 @@ static int do_init_module(struct module *mod) mod->strtab = mod->core_strtab; #endif unset_module_init_ro_nx(mod); + module_arch_freeing_init(mod); module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; -- cgit v1.2.3-59-g8ed1b From be1f221c0445a4157d177197c236f888d3581914 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 20 Jan 2015 09:07:05 +1030 Subject: module: remove mod arg from module_free, rename module_memfree(). Nothing needs the module pointer any more, and the next patch will call it from RCU, where the module itself might no longer exist. Removing the arg is the safest approach. This just codifies the use of the module_alloc/module_free pattern which ftrace and bpf use. Signed-off-by: Rusty Russell Acked-by: Alexei Starovoitov Cc: Mikael Starvik Cc: Jesper Nilsson Cc: Ralf Baechle Cc: Ley Foon Tan Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: Steven Rostedt Cc: x86@kernel.org Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: Masami Hiramatsu Cc: linux-cris-kernel@axis.com Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: nios2-dev@lists.rocketboards.org Cc: linuxppc-dev@lists.ozlabs.org Cc: sparclinux@vger.kernel.org Cc: netdev@vger.kernel.org --- arch/cris/kernel/module.c | 2 +- arch/mips/net/bpf_jit.c | 2 +- arch/nios2/kernel/module.c | 2 +- arch/powerpc/net/bpf_jit_comp.c | 2 +- arch/sparc/net/bpf_jit_comp.c | 4 ++-- arch/tile/kernel/module.c | 2 +- arch/x86/kernel/ftrace.c | 2 +- include/linux/moduleloader.h | 2 +- kernel/bpf/core.c | 2 +- kernel/kprobes.c | 2 +- kernel/module.c | 14 +++++++------- 11 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/cris/kernel/module.c b/arch/cris/kernel/module.c index 51123f985eb5..af04cb6b6dc9 100644 --- a/arch/cris/kernel/module.c +++ b/arch/cris/kernel/module.c @@ -36,7 +36,7 @@ void *module_alloc(unsigned long size) } /* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) +void module_memfree(void *module_region) { kfree(module_region); } diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 9fd6834a2172..5d6139390bf8 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -1388,7 +1388,7 @@ out: void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) - module_free(NULL, fp->bpf_func); + module_memfree(fp->bpf_func); bpf_prog_unlock_free(fp); } diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c index cc924a38f22a..e2e3f13f98d5 100644 --- a/arch/nios2/kernel/module.c +++ b/arch/nios2/kernel/module.c @@ -36,7 +36,7 @@ void *module_alloc(unsigned long size) } /* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) +void module_memfree(void *module_region) { kfree(module_region); } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 1ca125b9c226..d1916b577f2c 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -699,7 +699,7 @@ out: void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) - module_free(NULL, fp->bpf_func); + module_memfree(fp->bpf_func); bpf_prog_unlock_free(fp); } diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index f33e7c7a3bf7..7931eeeb649a 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -776,7 +776,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; if (unlikely(proglen + ilen > oldproglen)) { pr_err("bpb_jit_compile fatal error\n"); kfree(addrs); - module_free(NULL, image); + module_memfree(image); return; } memcpy(image + proglen, temp, ilen); @@ -822,7 +822,7 @@ out: void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) - module_free(NULL, fp->bpf_func); + module_memfree(fp->bpf_func); bpf_prog_unlock_free(fp); } diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c index 62a597e810d6..2305084c9b93 100644 --- a/arch/tile/kernel/module.c +++ b/arch/tile/kernel/module.c @@ -74,7 +74,7 @@ error: /* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) +void module_memfree(void *module_region) { vfree(module_region); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 2142376dc8c6..8b7b0a51e742 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -674,7 +674,7 @@ static inline void *alloc_tramp(unsigned long size) } static inline void tramp_free(void *tramp) { - module_free(NULL, tramp); + module_memfree(tramp); } #else /* Trampolines can only be created if modules are supported */ diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 054eac853090..f7556261fe3c 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -26,7 +26,7 @@ unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section); void *module_alloc(unsigned long size); /* Free memory returned from module_alloc. */ -void module_free(struct module *mod, void *module_region); +void module_memfree(void *module_region); /* * Apply the given relocation to the (simplified) ELF. Return -error diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d6594e457a25..a64e7a207d2b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -163,7 +163,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, void bpf_jit_binary_free(struct bpf_binary_header *hdr) { - module_free(NULL, hdr); + module_memfree(hdr); } #endif /* CONFIG_BPF_JIT */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 06f58309fed2..ee619929cf90 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -127,7 +127,7 @@ static void *alloc_insn_page(void) static void free_insn_page(void *page) { - module_free(NULL, page); + module_memfree(page); } struct kprobe_insn_cache kprobe_insn_slots = { diff --git a/kernel/module.c b/kernel/module.c index 68be0b1f9e7f..1f85fd5c89d3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1795,7 +1795,7 @@ static void unset_module_core_ro_nx(struct module *mod) { } static void unset_module_init_ro_nx(struct module *mod) { } #endif -void __weak module_free(struct module *mod, void *module_region) +void __weak module_memfree(void *module_region) { vfree(module_region); } @@ -1846,7 +1846,7 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); - module_free(mod, mod->module_init); + module_memfree(mod->module_init); kfree(mod->args); percpu_modfree(mod); @@ -1855,7 +1855,7 @@ static void free_module(struct module *mod) /* Finally, free the core (containing the module structure) */ unset_module_core_ro_nx(mod); - module_free(mod, mod->module_core); + module_memfree(mod->module_core); #ifdef CONFIG_MPU update_protections(current->mm); @@ -2790,7 +2790,7 @@ static int move_module(struct module *mod, struct load_info *info) */ kmemleak_ignore(ptr); if (!ptr) { - module_free(mod, mod->module_core); + module_memfree(mod->module_core); return -ENOMEM; } memset(ptr, 0, mod->init_size); @@ -2936,8 +2936,8 @@ static void module_deallocate(struct module *mod, struct load_info *info) { percpu_modfree(mod); module_arch_freeing_init(mod); - module_free(mod, mod->module_init); - module_free(mod, mod->module_core); + module_memfree(mod->module_init); + module_memfree(mod->module_core); } int __weak module_finalize(const Elf_Ehdr *hdr, @@ -3062,7 +3062,7 @@ static int do_init_module(struct module *mod) #endif unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); - module_free(mod, mod->module_init); + module_memfree(mod->module_init); mod->module_init = NULL; mod->init_size = 0; mod->init_ro_size = 0; -- cgit v1.2.3-59-g8ed1b From d5db139ab3764640e0882a1746e7b9fdee33fd87 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 22 Jan 2015 11:13:14 +1030 Subject: module: make module_refcount() a signed integer. James Bottomley points out that it will be -1 during unload. It's only used for diagnostics, so let's not hide that as it could be a clue as to what's gone wrong. Cc: Jason Wessel Acked-and-documention-added-by: James Bottomley Reviewed-by: Masami Hiramatsu Signed-off-by: Rusty Russell --- include/linux/module.h | 2 +- kernel/debug/kdb/kdb_main.c | 2 +- kernel/module.c | 17 +++++++++++++---- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index ebfb0e153c6a..b653d7c0a05a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -444,7 +444,7 @@ extern void __module_put_and_exit(struct module *mod, long code) #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code) #ifdef CONFIG_MODULE_UNLOAD -unsigned long module_refcount(struct module *mod); +int module_refcount(struct module *mod); void __symbol_put(const char *symbol); #define symbol_put(x) __symbol_put(VMLINUX_SYMBOL_STR(x)) void symbol_put_addr(void *addr); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 379650b984f8..2934889f2cce 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1979,7 +1979,7 @@ static int kdb_lsmod(int argc, const char **argv) kdb_printf("%-20s%8u 0x%p ", mod->name, mod->core_size, (void *)mod); #ifdef CONFIG_MODULE_UNLOAD - kdb_printf("%4ld ", module_refcount(mod)); + kdb_printf("%4d ", module_refcount(mod)); #endif if (mod->state == MODULE_STATE_GOING) kdb_printf(" (Unloading)"); diff --git a/kernel/module.c b/kernel/module.c index ed4ec9c30bd2..d856e96a3cce 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -772,9 +772,18 @@ static int try_stop_module(struct module *mod, int flags, int *forced) return 0; } -unsigned long module_refcount(struct module *mod) +/** + * module_refcount - return the refcount or -1 if unloading + * + * @mod: the module we're checking + * + * Returns: + * -1 if the module is in the process of unloading + * otherwise the number of references in the kernel to the module + */ +int module_refcount(struct module *mod) { - return (unsigned long)atomic_read(&mod->refcnt) - MODULE_REF_BASE; + return atomic_read(&mod->refcnt) - MODULE_REF_BASE; } EXPORT_SYMBOL(module_refcount); @@ -856,7 +865,7 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod) struct module_use *use; int printed_something = 0; - seq_printf(m, " %lu ", module_refcount(mod)); + seq_printf(m, " %i ", module_refcount(mod)); /* * Always include a trailing , so userspace can differentiate @@ -908,7 +917,7 @@ EXPORT_SYMBOL_GPL(symbol_put_addr); static ssize_t show_refcnt(struct module_attribute *mattr, struct module_kobject *mk, char *buffer) { - return sprintf(buffer, "%lu\n", module_refcount(mk->mod)); + return sprintf(buffer, "%i\n", module_refcount(mk->mod)); } static struct module_attribute modinfo_refcnt = -- cgit v1.2.3-59-g8ed1b