From a118084432d642eeccb961c7c8cc61525a941fcb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 20 May 2016 22:13:45 +0200 Subject: vfs: add d_real_inode() helper Needed by the following fix. Signed-off-by: Miklos Szeredi Cc: --- include/linux/dcache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 7e9422cb5989..ad5d582f9b14 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -576,5 +576,17 @@ static inline struct inode *vfs_select_inode(struct dentry *dentry, return inode; } +/** + * d_real_inode - Return the real inode + * @dentry: The dentry to query + * + * If dentry is on an union/overlay, then return the underlying, real inode. + * Otherwise return d_inode(). + */ +static inline struct inode *d_real_inode(struct dentry *dentry) +{ + return d_backing_inode(d_real(dentry)); +} + #endif /* __LINUX_DCACHE_H */ -- cgit v1.3-6-gb490 From 65925b65ed98ffdb277cf5ea1af45731dac0b30b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 21 May 2016 20:43:16 +0200 Subject: iio: st_sensors: switch to a threaded interrupt commit 98ad8b41f58dff6b30713d7f09ae3834b8df7ded ("iio: st_sensors: verify interrupt event to status") caused a regression when reading ST sensors from a HRTimer trigger rather than the intrinsic interrupts: the HRTimer may trigger faster than the sensor provides new values, and as the check against new values available as a cause of the interrupt trigger was done in the poll function, this would bail out of the HRTimer interrupt with IRQ_NONE. So clearly we need to only check the new values available from the proper interrupt handler and not from the poll function, which should rather just read the raw values from the registers, put them into the buffer and be happy. To achieve this: switch the ST Sensors over to using a true threaded interrupt handler. In the interrupt thread, check if new values are available, else yield to the (potential) next device on the same interrupt line to check the registers. If the interrupt was ours, proceed to poll the values. Instead of relying on iio_trigger_generic_data_rdy_poll() as a top half to wake up the thread that polls the sensor for new data, have the thread call iio_trigger_poll_chained() after determining that is is the proper source of the interrupt. This is modelled on drivers/iio/accel/mma8452.c which is already using a properly threaded interrupt handler. In order to get the same precision in timestamps as previously, where samples would be timestamped in the poll function pf->timestamp when calling iio_trigger_generic_data_rdy_poll() we introduce a local timestamp in the sensor data, set it in the top half (fastpath) of the interrupt handler and provide that to the core when calling iio_push_to_buffers_with_timestamp(). Additionally: if the active scanmask is not set for the sensor no IRQs should be enabled and we need to bail out with IRQ_NONE. This can happen if spurious IRQs fire when installing the threaded interrupt handler. Tested with hard interrupt triggers on LIS331DL, then also tested with hrtimers on the same sensor by creating a 75Hz HRTimer and using it to poll the sensor. Signed-off-by: Linus Walleij Cc: Giuseppe Barba Cc: Denis Ciocca Reported-by: Crestez Dan Leonard Tested-by: Crestez Dan Leonard Tested-by: Jonathan Cameron Fixes: 97865fe41322 ("iio: st_sensors: verify interrupt event to status") Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_buffer.c | 2 +- drivers/iio/accel/st_accel_core.c | 1 + drivers/iio/common/st_sensors/st_sensors_buffer.c | 25 ++---- drivers/iio/common/st_sensors/st_sensors_core.c | 3 + drivers/iio/common/st_sensors/st_sensors_trigger.c | 88 +++++++++++++++++++++- drivers/iio/gyro/st_gyro_buffer.c | 2 +- drivers/iio/gyro/st_gyro_core.c | 1 + drivers/iio/magnetometer/st_magn_buffer.c | 2 +- drivers/iio/magnetometer/st_magn_core.c | 1 + drivers/iio/pressure/st_pressure_buffer.c | 2 +- drivers/iio/pressure/st_pressure_core.c | 1 + include/linux/iio/common/st_sensors.h | 9 ++- 12 files changed, 111 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/accel/st_accel_buffer.c b/drivers/iio/accel/st_accel_buffer.c index a1e642ee13d6..7fddc137e91e 100644 --- a/drivers/iio/accel/st_accel_buffer.c +++ b/drivers/iio/accel/st_accel_buffer.c @@ -91,7 +91,7 @@ static const struct iio_buffer_setup_ops st_accel_buffer_setup_ops = { int st_accel_allocate_ring(struct iio_dev *indio_dev) { - return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time, + return iio_triggered_buffer_setup(indio_dev, NULL, &st_sensors_trigger_handler, &st_accel_buffer_setup_ops); } diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index dc73f2d85e6d..4d95bfc4786c 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -741,6 +741,7 @@ static const struct iio_info accel_info = { static const struct iio_trigger_ops st_accel_trigger_ops = { .owner = THIS_MODULE, .set_trigger_state = ST_ACCEL_TRIGGER_SET_STATE, + .validate_device = st_sensors_validate_device, }; #define ST_ACCEL_TRIGGER_OPS (&st_accel_trigger_ops) #else diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c index c55898543a47..f1693dbebb8a 100644 --- a/drivers/iio/common/st_sensors/st_sensors_buffer.c +++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c @@ -57,31 +57,20 @@ irqreturn_t st_sensors_trigger_handler(int irq, void *p) struct iio_poll_func *pf = p; struct iio_dev *indio_dev = pf->indio_dev; struct st_sensor_data *sdata = iio_priv(indio_dev); + s64 timestamp; - /* If we have a status register, check if this IRQ came from us */ - if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) { - u8 status; - - len = sdata->tf->read_byte(&sdata->tb, sdata->dev, - sdata->sensor_settings->drdy_irq.addr_stat_drdy, - &status); - if (len < 0) - dev_err(sdata->dev, "could not read channel status\n"); - - /* - * If this was not caused by any channels on this sensor, - * return IRQ_NONE - */ - if (!(status & (u8)indio_dev->active_scan_mask[0])) - return IRQ_NONE; - } + /* If we do timetamping here, do it before reading the values */ + if (sdata->hw_irq_trigger) + timestamp = sdata->hw_timestamp; + else + timestamp = iio_get_time_ns(); len = st_sensors_get_buffer_element(indio_dev, sdata->buffer_data); if (len < 0) goto st_sensors_get_buffer_element_error; iio_push_to_buffers_with_timestamp(indio_dev, sdata->buffer_data, - pf->timestamp); + timestamp); st_sensors_get_buffer_element_error: iio_trigger_notify_done(indio_dev->trig); diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index dffe00692169..928ee68fcc5f 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -424,6 +424,9 @@ int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable) else drdy_mask = sdata->sensor_settings->drdy_irq.mask_int2; + /* Flag to the poll function that the hardware trigger is in use */ + sdata->hw_irq_trigger = enable; + /* Enable/Disable the interrupt generator for data ready. */ err = st_sensors_write_data_with_mask(indio_dev, sdata->sensor_settings->drdy_irq.addr, diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c index da72279fcf99..1f59bcc0f143 100644 --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c @@ -17,6 +17,73 @@ #include #include "st_sensors_core.h" +/** + * st_sensors_irq_handler() - top half of the IRQ-based triggers + * @irq: irq number + * @p: private handler data + */ +irqreturn_t st_sensors_irq_handler(int irq, void *p) +{ + struct iio_trigger *trig = p; + struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); + struct st_sensor_data *sdata = iio_priv(indio_dev); + + /* Get the time stamp as close in time as possible */ + sdata->hw_timestamp = iio_get_time_ns(); + return IRQ_WAKE_THREAD; +} + +/** + * st_sensors_irq_thread() - bottom half of the IRQ-based triggers + * @irq: irq number + * @p: private handler data + */ +irqreturn_t st_sensors_irq_thread(int irq, void *p) +{ + struct iio_trigger *trig = p; + struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); + struct st_sensor_data *sdata = iio_priv(indio_dev); + int ret; + + /* + * If this trigger is backed by a hardware interrupt and we have a + * status register, check if this IRQ came from us + */ + if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) { + u8 status; + + ret = sdata->tf->read_byte(&sdata->tb, sdata->dev, + sdata->sensor_settings->drdy_irq.addr_stat_drdy, + &status); + if (ret < 0) { + dev_err(sdata->dev, "could not read channel status\n"); + goto out_poll; + } + /* + * the lower bits of .active_scan_mask[0] is directly mapped + * to the channels on the sensor: either bit 0 for + * one-dimensional sensors, or e.g. x,y,z for accelerometers, + * gyroscopes or magnetometers. No sensor use more than 3 + * channels, so cut the other status bits here. + */ + status &= 0x07; + + /* + * If this was not caused by any channels on this sensor, + * return IRQ_NONE + */ + if (!indio_dev->active_scan_mask) + return IRQ_NONE; + if (!(status & (u8)indio_dev->active_scan_mask[0])) + return IRQ_NONE; + } + +out_poll: + /* It's our IRQ: proceed to handle the register polling */ + iio_trigger_poll_chained(p); + return IRQ_HANDLED; +} + int st_sensors_allocate_trigger(struct iio_dev *indio_dev, const struct iio_trigger_ops *trigger_ops) { @@ -77,9 +144,12 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, sdata->sensor_settings->drdy_irq.addr_stat_drdy) irq_trig |= IRQF_SHARED; - err = request_threaded_irq(irq, - iio_trigger_generic_data_rdy_poll, - NULL, + /* Let's create an interrupt thread masking the hard IRQ here */ + irq_trig |= IRQF_ONESHOT; + + err = request_threaded_irq(sdata->get_irq_data_ready(indio_dev), + st_sensors_irq_handler, + st_sensors_irq_thread, irq_trig, sdata->trig->name, sdata->trig); @@ -119,6 +189,18 @@ void st_sensors_deallocate_trigger(struct iio_dev *indio_dev) } EXPORT_SYMBOL(st_sensors_deallocate_trigger); +int st_sensors_validate_device(struct iio_trigger *trig, + struct iio_dev *indio_dev) +{ + struct iio_dev *indio = iio_trigger_get_drvdata(trig); + + if (indio != indio_dev) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL(st_sensors_validate_device); + MODULE_AUTHOR("Denis Ciocca "); MODULE_DESCRIPTION("STMicroelectronics ST-sensors trigger"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/iio/gyro/st_gyro_buffer.c b/drivers/iio/gyro/st_gyro_buffer.c index d67b17b6a7aa..a5377044e42f 100644 --- a/drivers/iio/gyro/st_gyro_buffer.c +++ b/drivers/iio/gyro/st_gyro_buffer.c @@ -91,7 +91,7 @@ static const struct iio_buffer_setup_ops st_gyro_buffer_setup_ops = { int st_gyro_allocate_ring(struct iio_dev *indio_dev) { - return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time, + return iio_triggered_buffer_setup(indio_dev, NULL, &st_sensors_trigger_handler, &st_gyro_buffer_setup_ops); } diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c index 52a3c87c375c..a8012955a1f6 100644 --- a/drivers/iio/gyro/st_gyro_core.c +++ b/drivers/iio/gyro/st_gyro_core.c @@ -409,6 +409,7 @@ static const struct iio_info gyro_info = { static const struct iio_trigger_ops st_gyro_trigger_ops = { .owner = THIS_MODULE, .set_trigger_state = ST_GYRO_TRIGGER_SET_STATE, + .validate_device = st_sensors_validate_device, }; #define ST_GYRO_TRIGGER_OPS (&st_gyro_trigger_ops) #else diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c index ecd3bd0a9769..0a9e8fadfa9d 100644 --- a/drivers/iio/magnetometer/st_magn_buffer.c +++ b/drivers/iio/magnetometer/st_magn_buffer.c @@ -82,7 +82,7 @@ static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = { int st_magn_allocate_ring(struct iio_dev *indio_dev) { - return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time, + return iio_triggered_buffer_setup(indio_dev, NULL, &st_sensors_trigger_handler, &st_magn_buffer_setup_ops); } diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index 62036d2a9956..8250fc322c56 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -572,6 +572,7 @@ static const struct iio_info magn_info = { static const struct iio_trigger_ops st_magn_trigger_ops = { .owner = THIS_MODULE, .set_trigger_state = ST_MAGN_TRIGGER_SET_STATE, + .validate_device = st_sensors_validate_device, }; #define ST_MAGN_TRIGGER_OPS (&st_magn_trigger_ops) #else diff --git a/drivers/iio/pressure/st_pressure_buffer.c b/drivers/iio/pressure/st_pressure_buffer.c index 2ff53f222352..99468d0a64e7 100644 --- a/drivers/iio/pressure/st_pressure_buffer.c +++ b/drivers/iio/pressure/st_pressure_buffer.c @@ -82,7 +82,7 @@ static const struct iio_buffer_setup_ops st_press_buffer_setup_ops = { int st_press_allocate_ring(struct iio_dev *indio_dev) { - return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time, + return iio_triggered_buffer_setup(indio_dev, NULL, &st_sensors_trigger_handler, &st_press_buffer_setup_ops); } diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index 257b58ac6779..92a118c3c4ac 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -445,6 +445,7 @@ static const struct iio_info press_info = { static const struct iio_trigger_ops st_press_trigger_ops = { .owner = THIS_MODULE, .set_trigger_state = ST_PRESS_TRIGGER_SET_STATE, + .validate_device = st_sensors_validate_device, }; #define ST_PRESS_TRIGGER_OPS (&st_press_trigger_ops) #else diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index d029ffac0d69..99403b19092f 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -223,6 +223,8 @@ struct st_sensor_settings { * @get_irq_data_ready: Function to get the IRQ used for data ready signal. * @tf: Transfer function structure used by I/O operations. * @tb: Transfer buffers and mutex used by I/O operations. + * @hw_irq_trigger: if we're using the hardware interrupt on the sensor. + * @hw_timestamp: Latest timestamp from the interrupt handler, when in use. */ struct st_sensor_data { struct device *dev; @@ -247,6 +249,9 @@ struct st_sensor_data { const struct st_sensor_transfer_function *tf; struct st_sensor_transfer_buffer tb; + + bool hw_irq_trigger; + s64 hw_timestamp; }; #ifdef CONFIG_IIO_BUFFER @@ -260,7 +265,8 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, const struct iio_trigger_ops *trigger_ops); void st_sensors_deallocate_trigger(struct iio_dev *indio_dev); - +int st_sensors_validate_device(struct iio_trigger *trig, + struct iio_dev *indio_dev); #else static inline int st_sensors_allocate_trigger(struct iio_dev *indio_dev, const struct iio_trigger_ops *trigger_ops) @@ -271,6 +277,7 @@ static inline void st_sensors_deallocate_trigger(struct iio_dev *indio_dev) { return; } +#define st_sensors_validate_device NULL #endif int st_sensors_init_sensor(struct iio_dev *indio_dev, -- cgit v1.3-6-gb490 From b7ec35b304b64af2830027350cc99d31e6e537c2 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 28 Apr 2016 16:07:25 +0200 Subject: libceph: change ceph_osdmap_flag() to take osdc For the benefit of every single caller, take osdc instead of map. Also, now that osdc->osdmap can't ever be NULL, drop the check. Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 8 ++++---- include/linux/ceph/osd_client.h | 5 +++++ include/linux/ceph/osdmap.h | 5 ----- net/ceph/osd_client.c | 39 +++++++++++++++++++-------------------- 4 files changed, 28 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a888df6f2d71..8eeb9f579db5 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1349,7 +1349,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) } retry_snap: - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) { + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL)) { err = -ENOSPC; goto out; } @@ -1440,7 +1440,7 @@ retry_snap: ceph_put_cap_refs(ci, got); if (written >= 0) { - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL)) + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_NEARFULL)) iocb->ki_flags |= IOCB_DSYNC; written = generic_write_sync(iocb, written); @@ -1672,8 +1672,8 @@ static long ceph_fallocate(struct file *file, int mode, goto unlock; } - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) && - !(mode & FALLOC_FL_PUNCH_HOLE)) { + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) && + !(mode & FALLOC_FL_PUNCH_HOLE)) { ret = -ENOSPC; goto unlock; } diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 19b14862d3e0..1b3b6e155392 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -279,6 +279,11 @@ struct ceph_osd_client { struct workqueue_struct *notify_wq; }; +static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag) +{ + return osdc->osdmap->flags & flag; +} + extern int ceph_osdc_setup(void); extern void ceph_osdc_cleanup(void); diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index ddc426b22d81..9ccf4dbe55f8 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -189,11 +189,6 @@ static inline bool ceph_osd_is_down(struct ceph_osdmap *map, int osd) return !ceph_osd_is_up(map, osd); } -static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) -{ - return map && (map->flags & flag); -} - extern char *ceph_osdmap_state_str(char *str, int len, int state); extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 0160d7d09a1e..79c3bad87e62 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1276,9 +1276,9 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc, const struct ceph_osd_request_target *t, struct ceph_pg_pool_info *pi) { - bool pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD); - bool pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) || - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) || + bool pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); + bool pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || + ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || __pool_full(pi); WARN_ON(pi->id != t->base_oloc.pool); @@ -1303,8 +1303,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, bool force_resend = false; bool need_check_tiering = false; bool need_resend = false; - bool sort_bitwise = ceph_osdmap_flag(osdc->osdmap, - CEPH_OSDMAP_SORTBITWISE); + bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); enum calc_target_result ct_res; int ret; @@ -1590,9 +1589,9 @@ static void maybe_request_map(struct ceph_osd_client *osdc) verify_osdc_locked(osdc); WARN_ON(!osdc->osdmap->epoch); - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) || - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD) || - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR)) { + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || + ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD) || + ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) { dout("%s osdc %p continuous\n", __func__, osdc); continuous = true; } else { @@ -1629,19 +1628,19 @@ again: } if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR)) { + ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR)) { dout("req %p pausewr\n", req); req->r_t.paused = true; maybe_request_map(osdc); } else if ((req->r_flags & CEPH_OSD_FLAG_READ) && - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD)) { + ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { dout("req %p pauserd\n", req); req->r_t.paused = true; maybe_request_map(osdc); } else if ((req->r_flags & CEPH_OSD_FLAG_WRITE) && !(req->r_flags & (CEPH_OSD_FLAG_FULL_TRY | CEPH_OSD_FLAG_FULL_FORCE)) && - (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) || + (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || pool_full(osdc, req->r_t.base_oloc.pool))) { dout("req %p full/pool_full\n", req); pr_warn_ratelimited("FULL or reached pool quota\n"); @@ -2280,7 +2279,7 @@ static void send_linger_ping(struct ceph_osd_linger_request *lreq) struct ceph_osd_request *req = lreq->ping_req; struct ceph_osd_req_op *op = &req->r_ops[0]; - if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD)) { + if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD)) { dout("%s PAUSERD\n", __func__); return; } @@ -3050,7 +3049,7 @@ static int handle_one_map(struct ceph_osd_client *osdc, bool skipped_map = false; bool was_full; - was_full = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL); + was_full = ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL); set_pool_was_full(osdc); if (incremental) @@ -3088,7 +3087,7 @@ static int handle_one_map(struct ceph_osd_client *osdc, osdc->osdmap = newmap; } - was_full &= !ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL); + was_full &= !ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL); scan_requests(&osdc->homeless_osd, skipped_map, was_full, true, need_resend, need_resend_linger); @@ -3174,9 +3173,9 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) if (ceph_check_fsid(osdc->client, &fsid) < 0) goto bad; - was_pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD); - was_pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) || - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) || + was_pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); + was_pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || + ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc); /* incremental maps */ @@ -3238,9 +3237,9 @@ done: * we find out when we are no longer full and stop returning * ENOSPC. */ - pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD); - pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) || - ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) || + pauserd = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSERD); + pausewr = ceph_osdmap_flag(osdc, CEPH_OSDMAP_PAUSEWR) || + ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) || have_pool_full(osdc); if (was_pauserd || was_pausewr || pauserd || pausewr) maybe_request_map(osdc); -- cgit v1.3-6-gb490 From f1bddbb3de60872acc2446eee97dbeb0a6d57acb Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Thu, 5 May 2016 10:46:05 +0200 Subject: usb: gadget: Fix binding to UDC via configfs interface By default user could store only valid UDC name in configfs UDC attr by doing: echo $UDC_NAME > UDC Commit (855ed04 "usb: gadget: udc-core: independent registration of gadgets and gadget drivers") broke this behavior and allowed to store any arbitrary string in UDC file and udc core was waiting for such controller to appear. echo "any arbitrary string here" > UDC This commit fix this by adding a flag which prevents configfs gadget from being added to list of pending drivers if UDC with given name has not been found. Signed-off-by: Krzysztof Opasiak Signed-off-by: Felipe Balbi --- drivers/usb/gadget/configfs.c | 1 + drivers/usb/gadget/udc/udc-core.c | 12 ++++++++---- include/linux/usb/gadget.h | 3 +++ 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index b6f60ca8a035..70cf3477f951 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1401,6 +1401,7 @@ static const struct usb_gadget_driver configfs_driver_template = { .owner = THIS_MODULE, .name = "configfs-gadget", }, + .match_existing_only = 1, }; static struct config_group *gadgets_make( diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c index 6e8300d6a737..e1b2dcebdc2e 100644 --- a/drivers/usb/gadget/udc/udc-core.c +++ b/drivers/usb/gadget/udc/udc-core.c @@ -603,11 +603,15 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver) } } - list_add_tail(&driver->pending, &gadget_driver_pending_list); - pr_info("udc-core: couldn't find an available UDC - added [%s] to list of pending drivers\n", - driver->function); + if (!driver->match_existing_only) { + list_add_tail(&driver->pending, &gadget_driver_pending_list); + pr_info("udc-core: couldn't find an available UDC - added [%s] to list of pending drivers\n", + driver->function); + ret = 0; + } + mutex_unlock(&udc_lock); - return 0; + return ret; found: ret = udc_bind_to_driver(udc, driver); mutex_unlock(&udc_lock); diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 457651bf45b0..fefe8b06a63d 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -1034,6 +1034,8 @@ static inline int usb_gadget_activate(struct usb_gadget *gadget) * @udc_name: A name of UDC this driver should be bound to. If udc_name is NULL, * this driver will be bound to any available UDC. * @pending: UDC core private data used for deferred probe of this driver. + * @match_existing_only: If udc is not found, return an error and don't add this + * gadget driver to list of pending driver * * Devices are disabled till a gadget driver successfully bind()s, which * means the driver will handle setup() requests needed to enumerate (and @@ -1097,6 +1099,7 @@ struct usb_gadget_driver { char *udc_name; struct list_head pending; + unsigned match_existing_only:1; }; -- cgit v1.3-6-gb490 From e2082e3ab801b989d8d5337b2ecbfc61d09781cb Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 31 Mar 2016 16:26:50 -0400 Subject: dma-buf: headerdoc fixes Apparently nobody noticed that dma-buf.h wasn't actually pulled into docbook build. And as a result the headerdoc comments bitrot a bit. Add missing params/fields. Signed-off-by: Rob Clark Signed-off-by: Sumit Semwal --- include/linux/dma-buf.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 3fe90d494edb..4551c6f2a6c4 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -112,19 +112,24 @@ struct dma_buf_ops { * @file: file pointer used for sharing buffers across, and for refcounting. * @attachments: list of dma_buf_attachment that denotes all devices attached. * @ops: dma_buf_ops associated with this buffer object. + * @lock: used internally to serialize list manipulation, attach/detach and vmap/unmap + * @vmapping_counter: used internally to refcnt the vmaps + * @vmap_ptr: the current vmap ptr if vmapping_counter > 0 * @exp_name: name of the exporter; useful for debugging. * @owner: pointer to exporter module; used for refcounting when exporter is a * kernel module. * @list_node: node for dma_buf accounting and debugging. * @priv: exporter specific private data for this buffer object. * @resv: reservation object linked to this dma-buf + * @poll: for userspace poll support + * @cb_excl: for userspace poll support + * @cb_shared: for userspace poll support */ struct dma_buf { size_t size; struct file *file; struct list_head attachments; const struct dma_buf_ops *ops; - /* mutex to serialize list manipulation, attach/detach and vmap/unmap */ struct mutex lock; unsigned vmapping_counter; void *vmap_ptr; @@ -188,9 +193,11 @@ struct dma_buf_export_info { /** * helper macro for exporters; zeros and fills in most common values + * + * @name: export-info name */ -#define DEFINE_DMA_BUF_EXPORT_INFO(a) \ - struct dma_buf_export_info a = { .exp_name = KBUILD_MODNAME, \ +#define DEFINE_DMA_BUF_EXPORT_INFO(name) \ + struct dma_buf_export_info name = { .exp_name = KBUILD_MODNAME, \ .owner = THIS_MODULE } /** -- cgit v1.3-6-gb490 From dad6c3945fd25384c2b92306a90ba033e1130428 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 31 Mar 2016 16:26:51 -0400 Subject: reservation: add headerdoc comments Signed-off-by: Rob Clark Signed-off-by: Sumit Semwal --- drivers/dma-buf/reservation.c | 72 ++++++++++++++++++++++++++++++++++++++++--- include/linux/reservation.h | 53 +++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index c0bd5722c997..9566a62ad8e3 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -35,6 +35,17 @@ #include #include +/** + * DOC: Reservation Object Overview + * + * The reservation object provides a mechanism to manage shared and + * exclusive fences associated with a buffer. A reservation object + * can have attached one exclusive fence (normally associated with + * write operations) or N shared fences (read operations). The RCU + * mechanism is used to protect read access to fences from locked + * write-side updates. + */ + DEFINE_WW_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class); @@ -43,9 +54,17 @@ EXPORT_SYMBOL(reservation_seqcount_class); const char reservation_seqcount_string[] = "reservation_seqcount"; EXPORT_SYMBOL(reservation_seqcount_string); -/* - * Reserve space to add a shared fence to a reservation_object, - * must be called with obj->lock held. + +/** + * reservation_object_reserve_shared - Reserve space to add a shared + * fence to a reservation_object. + * @obj: reservation object + * + * Should be called before reservation_object_add_shared_fence(). Must + * be called with obj->lock held. + * + * RETURNS + * Zero for success, or -errno */ int reservation_object_reserve_shared(struct reservation_object *obj) { @@ -180,7 +199,11 @@ done: fence_put(old_fence); } -/* +/** + * reservation_object_add_shared_fence - Add a fence to a shared slot + * @obj: the reservation object + * @fence: the shared fence to add + * * Add a fence to a shared slot, obj->lock must be held, and * reservation_object_reserve_shared_fence has been called. */ @@ -200,6 +223,13 @@ void reservation_object_add_shared_fence(struct reservation_object *obj, } EXPORT_SYMBOL(reservation_object_add_shared_fence); +/** + * reservation_object_add_excl_fence - Add an exclusive fence. + * @obj: the reservation object + * @fence: the shared fence to add + * + * Add a fence to the exclusive slot. The obj->lock must be held. + */ void reservation_object_add_excl_fence(struct reservation_object *obj, struct fence *fence) { @@ -233,6 +263,18 @@ void reservation_object_add_excl_fence(struct reservation_object *obj, } EXPORT_SYMBOL(reservation_object_add_excl_fence); +/** + * reservation_object_get_fences_rcu - Get an object's shared and exclusive + * fences without update side lock held + * @obj: the reservation object + * @pfence_excl: the returned exclusive fence (or NULL) + * @pshared_count: the number of shared fences returned + * @pshared: the array of shared fence ptrs returned (array is krealloc'd to + * the required size, and must be freed by caller) + * + * RETURNS + * Zero or -errno + */ int reservation_object_get_fences_rcu(struct reservation_object *obj, struct fence **pfence_excl, unsigned *pshared_count, @@ -319,6 +361,18 @@ unlock: } EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu); +/** + * reservation_object_wait_timeout_rcu - Wait on reservation's objects + * shared and/or exclusive fences. + * @obj: the reservation object + * @wait_all: if true, wait on all fences, else wait on just exclusive fence + * @intr: if true, do interruptible wait + * @timeout: timeout value in jiffies or zero to return immediately + * + * RETURNS + * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or + * greater than zer on success. + */ long reservation_object_wait_timeout_rcu(struct reservation_object *obj, bool wait_all, bool intr, unsigned long timeout) @@ -416,6 +470,16 @@ reservation_object_test_signaled_single(struct fence *passed_fence) return ret; } +/** + * reservation_object_test_signaled_rcu - Test if a reservation object's + * fences have been signaled. + * @obj: the reservation object + * @test_all: if true, test all fences, otherwise only test the exclusive + * fence + * + * RETURNS + * true if all fences signaled, else false + */ bool reservation_object_test_signaled_rcu(struct reservation_object *obj, bool test_all) { diff --git a/include/linux/reservation.h b/include/linux/reservation.h index 49d057655d62..b0f305e77b7f 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -49,12 +49,27 @@ extern struct ww_class reservation_ww_class; extern struct lock_class_key reservation_seqcount_class; extern const char reservation_seqcount_string[]; +/** + * struct reservation_object_list - a list of shared fences + * @rcu: for internal use + * @shared_count: table of shared fences + * @shared_max: for growing shared fence table + * @shared: shared fence table + */ struct reservation_object_list { struct rcu_head rcu; u32 shared_count, shared_max; struct fence __rcu *shared[]; }; +/** + * struct reservation_object - a reservation object manages fences for a buffer + * @lock: update side lock + * @seq: sequence count for managing RCU read-side synchronization + * @fence_excl: the exclusive fence, if there is one currently + * @fence: list of current shared fences + * @staged: staged copy of shared fences for RCU updates + */ struct reservation_object { struct ww_mutex lock; seqcount_t seq; @@ -68,6 +83,10 @@ struct reservation_object { #define reservation_object_assert_held(obj) \ lockdep_assert_held(&(obj)->lock.base) +/** + * reservation_object_init - initialize a reservation object + * @obj: the reservation object + */ static inline void reservation_object_init(struct reservation_object *obj) { @@ -79,6 +98,10 @@ reservation_object_init(struct reservation_object *obj) obj->staged = NULL; } +/** + * reservation_object_fini - destroys a reservation object + * @obj: the reservation object + */ static inline void reservation_object_fini(struct reservation_object *obj) { @@ -106,6 +129,14 @@ reservation_object_fini(struct reservation_object *obj) ww_mutex_destroy(&obj->lock); } +/** + * reservation_object_get_list - get the reservation object's + * shared fence list, with update-side lock held + * @obj: the reservation object + * + * Returns the shared fence list. Does NOT take references to + * the fence. The obj->lock must be held. + */ static inline struct reservation_object_list * reservation_object_get_list(struct reservation_object *obj) { @@ -113,6 +144,17 @@ reservation_object_get_list(struct reservation_object *obj) reservation_object_held(obj)); } +/** + * reservation_object_get_excl - get the reservation object's + * exclusive fence, with update-side lock held + * @obj: the reservation object + * + * Returns the exclusive fence (if any). Does NOT take a + * reference. The obj->lock must be held. + * + * RETURNS + * The exclusive fence or NULL + */ static inline struct fence * reservation_object_get_excl(struct reservation_object *obj) { @@ -120,6 +162,17 @@ reservation_object_get_excl(struct reservation_object *obj) reservation_object_held(obj)); } +/** + * reservation_object_get_excl_rcu - get the reservation object's + * exclusive fence, without lock held. + * @obj: the reservation object + * + * If there is an exclusive fence, this atomically increments it's + * reference count and returns it. + * + * RETURNS + * The exclusive fence or NULL if none + */ static inline struct fence * reservation_object_get_excl_rcu(struct reservation_object *obj) { -- cgit v1.3-6-gb490 From 4320c2a22df12f954edd4997f71ca3a4216312b2 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Mon, 11 Apr 2016 12:48:55 +0100 Subject: fence: add missing descriptions for fence The members child_list and active_list were added to the fence struct without descriptions for the Documentation. Adding these. Fixes: b55b54b5db33 ("staging/android: remove struct sync_pt") Signed-off-by: Luis de Bethencourt Reviewed-by: Javier Martinez Canillas Reviewed-by: Gustavo Padovan Signed-off-by: Sumit Semwal --- include/linux/fence.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fence.h b/include/linux/fence.h index 2b17698b60b8..2056e9fd0138 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -49,6 +49,8 @@ struct fence_cb; * @timestamp: Timestamp when the fence was signaled. * @status: Optional, only valid if < 0, must be set before calling * fence_signal, indicates that the fence has completed with an error. + * @child_list: list of children fences + * @active_list: list of active fences * * the flags member must be manipulated and read using the appropriate * atomic ops (bit_*), so taking the spinlock will not be needed most -- cgit v1.3-6-gb490 From 40eb90e9ccc3f96f937ea1db79d0f9cb61553ed5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 29 May 2016 17:42:13 +0800 Subject: sctp: sctp_diag should dump sctp socket type Now we cannot distinguish that one sk is a udp or sctp style when we use ss to dump sctp_info. it's necessary to dump it as well. For sctp_diag, ss support is not officially available, thus there are no official users of this yet, so we can add this field in the middle of sctp_info without breaking user API. v1->v2: - move 'sctpi_s_type' field to the end of struct sctp_info, so that it won't cause incompatibility with applications already built. - add __reserved3 in sctp_info to make sure sctp_info is 8-byte alignment. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/sctp.h | 2 ++ net/sctp/socket.c | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index dacb5e711994..de1f64318fc4 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -765,6 +765,8 @@ struct sctp_info { __u8 sctpi_s_disable_fragments; __u8 sctpi_s_v4mapped; __u8 sctpi_s_frag_interleave; + __u32 sctpi_s_type; + __u32 __reserved3; }; struct sctp_infox { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 777d0324594a..67154b848aa9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4220,6 +4220,7 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_s_disable_fragments = sp->disable_fragments; info->sctpi_s_v4mapped = sp->v4mapped; info->sctpi_s_frag_interleave = sp->frag_interleave; + info->sctpi_s_type = sp->type; return 0; } -- cgit v1.3-6-gb490 From 480ce08a70e4179f34808a3bdbfe6627f624cf54 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 20 May 2016 18:32:31 +0800 Subject: FS-Cache: make check_consistency callback return int __fscache_check_consistency() calls check_consistency() callback and return the callback's return value. But the return type of check_consistency() is bool. So __fscache_check_consistency() return 1 if the cache is inconsistent. This is inconsistent with the document. Signed-off-by: Yan, Zheng Acked-by: David Howells --- fs/cachefiles/interface.c | 2 +- include/linux/fscache-cache.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 861d611b8c05..ce5f345d70f5 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -380,7 +380,7 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache) * check if the backing cache is updated to FS-Cache * - called by FS-Cache when evaluates if need to invalidate the cache */ -static bool cachefiles_check_consistency(struct fscache_operation *op) +static int cachefiles_check_consistency(struct fscache_operation *op) { struct cachefiles_object *object; struct cachefiles_cache *cache; diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 604e1526cd00..13ba552e6c09 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -241,7 +241,7 @@ struct fscache_cache_ops { /* check the consistency between the backing cache and the FS-Cache * cookie */ - bool (*check_consistency)(struct fscache_operation *op); + int (*check_consistency)(struct fscache_operation *op); /* store the updated auxiliary data on an object */ void (*update_object)(struct fscache_object *object); -- cgit v1.3-6-gb490 From 5a5e78cd706ce0577ed6356634a34552c866cc10 Mon Sep 17 00:00:00 2001 From: Caesar Wang Date: Wed, 18 May 2016 23:01:39 +0800 Subject: thermal: add the note for set_trip_temp Fixes commit 60f9ce3ada53 ("thermal: of-thermal: allow setting trip_temp on hardware") Signed-off-by: Caesar Wang Cc: Zhang Rui Cc: Eduardo Valentin Cc: linux-pm@vger.kernel.org Signed-off-by: Zhang Rui --- include/linux/thermal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index e45abe7db9a6..ee517bef0db0 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -335,6 +335,8 @@ struct thermal_genl_event { * @get_trend: a pointer to a function that reads the sensor temperature trend. * @set_emul_temp: a pointer to a function that sets sensor emulated * temperature. + * @set_trip_temp: a pointer to a function that sets the trip temperature on + * hardware. */ struct thermal_zone_of_device_ops { int (*get_temp)(void *, int *); -- cgit v1.3-6-gb490 From dfc2507b26af22b0bbc85251b8545b36d8bc5d72 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 1 Jun 2016 11:53:26 -0700 Subject: time: Make settimeofday error checking work again In commit 86d3473224b0 some of the checking for a valid timeval was subtley changed which caused -EINVAL to be returned whenever the timeval was null. However, it is possible to set the timezone data while specifying a NULL timeval, which is usually done to handle systems where the RTC keeps local time instead of UTC. Thus the patch causes such systems to have the time incorrectly set. This patch addresses the issue by handling the error conditionals in the same way as was done previously. Fixes: 86d3473224b0 "time: Introduce do_sys_settimeofday64()" Reported-by: Mika Westerberg Signed-off-by: John Stultz Tested-by: Mika Westerberg Cc: Prarit Bhargava Cc: Arnd Bergmann Cc: Baolin Wang Cc: Richard Cochran Cc: Shuah Khan Link: http://lkml.kernel.org/r/1464807207-16530-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- include/linux/timekeeping.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 37dbacf84849..816b7543f81b 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -21,6 +21,9 @@ static inline int do_sys_settimeofday(const struct timespec *tv, struct timespec64 ts64; if (!tv) + return do_sys_settimeofday64(NULL, tz); + + if (!timespec_valid(tv)) return -EINVAL; ts64 = timespec_to_timespec64(*tv); -- cgit v1.3-6-gb490 From b9610e74586fd183b2d1c7fe5316bce8b6cc534f Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 1 Jun 2016 14:56:57 -0700 Subject: clk: Remove CLK_IS_ROOT flag Now that we've gotten rid of all the users of this flag we can retire the number, leaving a slot open for a future flag user. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 0c72204c75fc..fb39d5add173 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -25,7 +25,7 @@ #define CLK_SET_PARENT_GATE BIT(1) /* must be gated across re-parent */ #define CLK_SET_RATE_PARENT BIT(2) /* propagate rate change up one level */ #define CLK_IGNORE_UNUSED BIT(3) /* do not gate even if unused */ -#define CLK_IS_ROOT BIT(4) /* Deprecated: Don't use */ + /* unused */ #define CLK_IS_BASIC BIT(5) /* Basic clk, can't do a to_clk_foo() */ #define CLK_GET_RATE_NOCACHE BIT(6) /* do not use the cached clk rate */ #define CLK_SET_RATE_NO_REPARENT BIT(7) /* don't re-parent on rate change */ -- cgit v1.3-6-gb490 From 12b7db2bf8b88938798c60416172b53225207b1f Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 31 May 2016 10:05:19 -0500 Subject: usb: musb: Return error value from musb_mailbox At least on n900 we have phy-twl4030-usb only generating cable interrupts, and then have a separate USB PHY. In order for musb to know the real cable status, we need to clear any cached state until musb is ready. Otherwise the cable status interrupts will get just ignored if the status does not change from the initial state. To do this, let's add a return value to musb_mailbox(), and reset cached linkstat to MUSB_UNKNOWN on error. Sorry to cause a bit of churn here, I should have added that already last time patching musb_mailbox(). Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/phy/phy-twl4030-usb.c | 14 ++++++++++---- drivers/usb/musb/musb_core.c | 7 ++++--- drivers/usb/musb/musb_core.h | 2 +- drivers/usb/musb/omap2430.c | 8 +++++--- drivers/usb/phy/phy-twl6030-usb.c | 12 +++++++++--- include/linux/usb/musb.h | 5 +++-- 6 files changed, 32 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c index 6b6af6cba454..d9b10a39a2cf 100644 --- a/drivers/phy/phy-twl4030-usb.c +++ b/drivers/phy/phy-twl4030-usb.c @@ -463,7 +463,8 @@ static int twl4030_phy_power_on(struct phy *phy) twl4030_usb_set_mode(twl, twl->usb_mode); if (twl->usb_mode == T2_USB_MODE_ULPI) twl4030_i2c_access(twl, 0); - schedule_delayed_work(&twl->id_workaround_work, 0); + twl->linkstat = MUSB_UNKNOWN; + schedule_delayed_work(&twl->id_workaround_work, HZ); return 0; } @@ -537,6 +538,7 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl) struct twl4030_usb *twl = _twl; enum musb_vbus_id_status status; bool status_changed = false; + int err; status = twl4030_usb_linkstat(twl); @@ -567,7 +569,9 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl) pm_runtime_mark_last_busy(twl->dev); pm_runtime_put_autosuspend(twl->dev); } - musb_mailbox(status); + err = musb_mailbox(status); + if (err) + twl->linkstat = MUSB_UNKNOWN; } /* don't schedule during sleep - irq works right then */ @@ -595,7 +599,8 @@ static int twl4030_phy_init(struct phy *phy) struct twl4030_usb *twl = phy_get_drvdata(phy); pm_runtime_get_sync(twl->dev); - schedule_delayed_work(&twl->id_workaround_work, 0); + twl->linkstat = MUSB_UNKNOWN; + schedule_delayed_work(&twl->id_workaround_work, HZ); pm_runtime_mark_last_busy(twl->dev); pm_runtime_put_autosuspend(twl->dev); @@ -763,7 +768,8 @@ static int twl4030_usb_remove(struct platform_device *pdev) if (cable_present(twl->linkstat)) pm_runtime_put_noidle(twl->dev); pm_runtime_mark_last_busy(twl->dev); - pm_runtime_put_sync_suspend(twl->dev); + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_put_sync(twl->dev); pm_runtime_disable(twl->dev); /* autogate 60MHz ULPI clock, diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 23888d579e8b..6469eff4fc30 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1679,7 +1679,7 @@ EXPORT_SYMBOL_GPL(musb_dma_completion); #define use_dma 0 #endif -static void (*musb_phy_callback)(enum musb_vbus_id_status status); +static int (*musb_phy_callback)(enum musb_vbus_id_status status); /* * musb_mailbox - optional phy notifier function @@ -1688,11 +1688,12 @@ static void (*musb_phy_callback)(enum musb_vbus_id_status status); * Optionally gets called from the USB PHY. Note that the USB PHY must be * disabled at the point the phy_callback is registered or unregistered. */ -void musb_mailbox(enum musb_vbus_id_status status) +int musb_mailbox(enum musb_vbus_id_status status) { if (musb_phy_callback) - musb_phy_callback(status); + return musb_phy_callback(status); + return -ENODEV; }; EXPORT_SYMBOL_GPL(musb_mailbox); diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index 29473846b098..b55a776b03eb 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -215,7 +215,7 @@ struct musb_platform_ops { dma_addr_t *dma_addr, u32 *len); void (*pre_root_reset_end)(struct musb *musb); void (*post_root_reset_end)(struct musb *musb); - void (*phy_callback)(enum musb_vbus_id_status status); + int (*phy_callback)(enum musb_vbus_id_status status); }; /* diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index d312d42592d6..2c54f52ae386 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -180,22 +180,24 @@ static void omap2430_set_power(struct musb *musb, bool enabled, bool cable) } } -static void omap2430_musb_mailbox(enum musb_vbus_id_status status) +static int omap2430_musb_mailbox(enum musb_vbus_id_status status) { struct omap2430_glue *glue = _glue; if (!glue) { pr_err("%s: musb core is not yet initialized\n", __func__); - return; + return -EPROBE_DEFER; } glue->status = status; if (!glue_to_musb(glue)) { pr_err("%s: musb core is not yet ready\n", __func__); - return; + return -EPROBE_DEFER; } schedule_work(&glue->omap_musb_mailbox_work); + + return 0; } static void omap_musb_set_mailbox(struct omap2430_glue *glue) diff --git a/drivers/usb/phy/phy-twl6030-usb.c b/drivers/usb/phy/phy-twl6030-usb.c index 24e2b3cf1867..c66a447c3dfe 100644 --- a/drivers/usb/phy/phy-twl6030-usb.c +++ b/drivers/usb/phy/phy-twl6030-usb.c @@ -227,12 +227,16 @@ static irqreturn_t twl6030_usb_irq(int irq, void *_twl) twl->asleep = 1; status = MUSB_VBUS_VALID; twl->linkstat = status; - musb_mailbox(status); + ret = musb_mailbox(status); + if (ret) + twl->linkstat = MUSB_UNKNOWN; } else { if (twl->linkstat != MUSB_UNKNOWN) { status = MUSB_VBUS_OFF; twl->linkstat = status; - musb_mailbox(status); + ret = musb_mailbox(status); + if (ret) + twl->linkstat = MUSB_UNKNOWN; if (twl->asleep) { regulator_disable(twl->usb3v3); twl->asleep = 0; @@ -264,7 +268,9 @@ static irqreturn_t twl6030_usbotg_irq(int irq, void *_twl) twl6030_writeb(twl, TWL_MODULE_USB, 0x10, USB_ID_INT_EN_HI_SET); status = MUSB_ID_GROUND; twl->linkstat = status; - musb_mailbox(status); + ret = musb_mailbox(status); + if (ret) + twl->linkstat = MUSB_UNKNOWN; } else { twl6030_writeb(twl, TWL_MODULE_USB, 0x10, USB_ID_INT_EN_HI_CLR); twl6030_writeb(twl, TWL_MODULE_USB, 0x1, USB_ID_INT_EN_HI_SET); diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index 0b3da40a525e..d315c8907869 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -142,10 +142,11 @@ enum musb_vbus_id_status { }; #if IS_ENABLED(CONFIG_USB_MUSB_HDRC) -void musb_mailbox(enum musb_vbus_id_status status); +int musb_mailbox(enum musb_vbus_id_status status); #else -static inline void musb_mailbox(enum musb_vbus_id_status status) +static inline int musb_mailbox(enum musb_vbus_id_status status) { + return 0; } #endif -- cgit v1.3-6-gb490 From dd5f1b049dc139876801db3cdd0f20d21fd428cc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 2 Jun 2016 09:00:28 +0100 Subject: irqchip/gic-v3: Fix ICC_SGI1R_EL1.INTID decoding mask The INTID mask is wrong, and is made a signed value, which has nteresting effects in the KVM emulation. Let's sanitize it. Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index bfbd707de390..85a8c2acdef5 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -305,7 +305,7 @@ #define ICC_SGI1R_AFFINITY_1_SHIFT 16 #define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT) #define ICC_SGI1R_SGI_ID_SHIFT 24 -#define ICC_SGI1R_SGI_ID_MASK (0xff << ICC_SGI1R_SGI_ID_SHIFT) +#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT) #define ICC_SGI1R_AFFINITY_2_SHIFT 32 #define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) #define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 -- cgit v1.3-6-gb490 From fab0cdc30d81694d2d5524b24e42c43414971719 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 12 May 2016 10:46:34 +0200 Subject: irqchip/gic-v3: Fix copy+paste mistakes in defines ICC_SGI1R_AFFINITY_{2,3}_MASK are unused, which is good because they were defined with the wrong shifts. Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 85a8c2acdef5..dc493e0f0ff7 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -307,10 +307,10 @@ #define ICC_SGI1R_SGI_ID_SHIFT 24 #define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT) #define ICC_SGI1R_AFFINITY_2_SHIFT 32 -#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT) #define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 #define ICC_SGI1R_AFFINITY_3_SHIFT 48 -#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT) #include -- cgit v1.3-6-gb490 From 9bd616e3dbedfc103f158197c8ad93678849b1ed Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 Jun 2016 18:52:16 +0100 Subject: cpuidle: Do not access cpuidle_devices when !CONFIG_CPU_IDLE The cpuidle_devices per-CPU variable is only defined when CPU_IDLE is enabled. Commit c8cc7d4de7a4 ("sched/idle: Reorganize the idle loop") removed the #ifdef CONFIG_CPU_IDLE around cpuidle_idle_call() with the compiler optimising away __this_cpu_read(cpuidle_devices). However, with CONFIG_UBSAN && !CONFIG_CPU_IDLE, this optimisation no longer happens and the kernel fails to link since cpuidle_devices is not defined. This patch introduces an accessor function for the current CPU cpuidle device (returning NULL when !CONFIG_CPU_IDLE) and uses it in cpuidle_idle_call(). Signed-off-by: Catalin Marinas Cc: 4.5+ # 4.5+ Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 3 +++ kernel/sched/idle.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 786ad32631a6..07b83d32f66c 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -152,6 +152,8 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev); extern int cpuidle_play_dead(void); extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); +static inline struct cpuidle_device *cpuidle_get_device(void) +{return __this_cpu_read(cpuidle_devices); } #else static inline void disable_cpuidle(void) { } static inline bool cpuidle_not_available(struct cpuidle_driver *drv, @@ -187,6 +189,7 @@ static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } static inline int cpuidle_play_dead(void) {return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_cpu_driver( struct cpuidle_device *dev) {return NULL; } +static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; } #endif #if defined(CONFIG_CPU_IDLE) && defined(CONFIG_SUSPEND) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index bd12c6c714ec..c5aeedf4e93a 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -127,7 +127,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, */ static void cpuidle_idle_call(void) { - struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_device *dev = cpuidle_get_device(); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; -- cgit v1.3-6-gb490 From 55eed755c6e30a89be3a791a6b0ad208aadd9bdc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 May 2016 13:11:17 +0200 Subject: locking/seqcount: Re-fix raw_read_seqcount_latch() Commit 50755bc1c305 ("seqlock: fix raw_read_seqcount_latch()") broke raw_read_seqcount_latch(). If you look at the comment that was modified; the thing that changes is the seq count, not the latch pointer. * void latch_modify(struct latch_struct *latch, ...) * { * smp_wmb(); <- Ensure that the last data[1] update is visible * latch->seq++; * smp_wmb(); <- Ensure that the seqcount update is visible * * modify(latch->data[0], ...); * * smp_wmb(); <- Ensure that the data[0] update is visible * latch->seq++; * smp_wmb(); <- Ensure that the seqcount update is visible * * modify(latch->data[1], ...); * } * * The query will have a form like: * * struct entry *latch_query(struct latch_struct *latch, ...) * { * struct entry *entry; * unsigned seq, idx; * * do { * seq = lockless_dereference(latch->seq); So here we have: seq = READ_ONCE(latch->seq); smp_read_barrier_depends(); Which is exactly what we want; the new code: seq = ({ p = READ_ONCE(latch); smp_read_barrier_depends(); p })->seq; is just wrong; because it looses the volatile read on seq, which can now be torn or worse 'optimized'. And the read_depend barrier is also placed wrong, we want it after the load of seq, to match the above data[] up-to-date wmb()s. Such that when we dereference latch->data[] below, we're guaranteed to observe the right data. * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); * * smp_rmb(); * } while (seq != latch->seq); * * return entry; * } So yes, not passing a pointer is not pretty, but the code was correct, and isn't anymore now. Change to explicit READ_ONCE()+smp_read_barrier_depends() to avoid confusion and allow strict lockless_dereference() checking. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Paul McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 50755bc1c305 ("seqlock: fix raw_read_seqcount_latch()") Link: http://lkml.kernel.org/r/20160527111117.GL3192@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/seqlock.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 7973a821ac58..ead97654c4e9 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -277,7 +277,10 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) static inline int raw_read_seqcount_latch(seqcount_t *s) { - return lockless_dereference(s)->sequence; + int seq = READ_ONCE(s->sequence); + /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ + smp_read_barrier_depends(); + return seq; } /** @@ -331,7 +334,7 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * unsigned seq, idx; * * do { - * seq = lockless_dereference(latch)->seq; + * seq = raw_read_seqcount_latch(&latch->seq); * * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); -- cgit v1.3-6-gb490 From 55f1ea15216a5a14c96738bd5284100a00ffa9dc Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 31 May 2016 11:23:43 +0100 Subject: efi: Fix for_each_efi_memory_desc_in_map() for empty memmaps Commit: 78ce248faa3c ("efi: Iterate over efi.memmap in for_each_efi_memory_desc()") introduced a regression for systems booted with the 'noefi' kernel option. In particular, I observed an early kernel hang in efi_find_mirror()'s for_each_efi_memory_desc() call. As we don't have efi memmap on this system we enter this iterator with the following parameters: efi.memmap.map = 0, efi.memmap.map_end = 0, efi.memmap.desc_size = 28 ... then for_each_efi_memory_desc_in_map() does the following comparison: (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); ... where md = 0, (m)->map_end = 0 and (m)->desc_size = 28 but when we subtract something from a NULL pointer wrap around happens and we end up returning invalid pointer and crash. Fix it by using the correct pointer arithmetics. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: K. Y. Srinivasan Cc: Linus Torvalds Cc: Mark Salter Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 78ce248faa3c ("efi: Iterate over efi.memmap in for_each_efi_memory_desc()") Link: http://lkml.kernel.org/r/1464690224-4503-2-git-send-email-matt@codeblueprint.co.uk [ Made the changelog more readable. ] Signed-off-by: Ingo Molnar --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index c2db3ca22217..f196dd0b0f2f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1005,7 +1005,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm, /* Iterate through an efi_memory_map */ #define for_each_efi_memory_desc_in_map(m, md) \ for ((md) = (m)->map; \ - (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \ + ((void *)(md) + (m)->desc_size) <= (m)->map_end; \ (md) = (void *)(md) + (m)->desc_size) /** -- cgit v1.3-6-gb490 From e93aeeae0bf8cba43e05484ae4ad17213ac6c3a7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 25 May 2016 01:15:04 +0000 Subject: of: add missing const for of_parse_phandle_with_args() in !CONFIG_OF commit 93c667ca2598bd84f1bd3f2fa176af69707699fe ("of: *node argument to of_parse_phandle_with_args should be const") changed to const for struct device node *np, but it cares CONFIG_OF case only, !CONFIG_OF case need it too. Signed-off-by: Kuninori Morimoto Signed-off-by: Rob Herring --- include/linux/of.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index c7292e8ea080..74eb28cadbef 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -614,7 +614,7 @@ static inline struct device_node *of_parse_phandle(const struct device_node *np, return NULL; } -static inline int of_parse_phandle_with_args(struct device_node *np, +static inline int of_parse_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name, int index, -- cgit v1.3-6-gb490 From f86e4271978bd93db466d6a95dad4b0fdcdb04f6 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 3 Jun 2016 14:55:38 -0700 Subject: mm: check the return value of lookup_page_ext for all call sites Per the discussion with Joonsoo Kim [1], we need check the return value of lookup_page_ext() for all call sites since it might return NULL in some cases, although it is unlikely, i.e. memory hotplug. Tested with ltp with "page_owner=0". [1] http://lkml.kernel.org/r/20160519002809.GA10245@js1304-P5Q-DELUXE [akpm@linux-foundation.org: fix build-breaking typos] [arnd@arndb.de: fix build problems from lookup_page_ext] Link: http://lkml.kernel.org/r/6285269.2CksypHdYp@wuerfel [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/1464023768-31025-1-git-send-email-yang.shi@linaro.org Signed-off-by: Yang Shi Signed-off-by: Arnd Bergmann Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_idle.h | 43 ++++++++++++++++++++++++++++++++++++------- mm/page_alloc.c | 6 ++++++ mm/page_owner.c | 26 ++++++++++++++++++++++++++ mm/page_poison.c | 8 +++++++- mm/vmstat.c | 2 ++ 5 files changed, 77 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index bf268fa92c5b..fec40271339f 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -46,33 +46,62 @@ extern struct page_ext_operations page_idle_ops; static inline bool page_is_young(struct page *page) { - return test_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return false; + + return test_bit(PAGE_EXT_YOUNG, &page_ext->flags); } static inline void set_page_young(struct page *page) { - set_bit(PAGE_EXT_YOUNG, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return; + + set_bit(PAGE_EXT_YOUNG, &page_ext->flags); } static inline bool test_and_clear_page_young(struct page *page) { - return test_and_clear_bit(PAGE_EXT_YOUNG, - &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return false; + + return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags); } static inline bool page_is_idle(struct page *page) { - return test_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return false; + + return test_bit(PAGE_EXT_IDLE, &page_ext->flags); } static inline void set_page_idle(struct page *page) { - set_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return; + + set_bit(PAGE_EXT_IDLE, &page_ext->flags); } static inline void clear_page_idle(struct page *page) { - clear_bit(PAGE_EXT_IDLE, &lookup_page_ext(page)->flags); + struct page_ext *page_ext = lookup_page_ext(page); + + if (unlikely(!page_ext)) + return; + + clear_bit(PAGE_EXT_IDLE, &page_ext->flags); } #endif /* CONFIG_64BIT */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f8f3bfc435ee..d27e8b968ac3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -656,6 +656,9 @@ static inline void set_page_guard(struct zone *zone, struct page *page, return; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); INIT_LIST_HEAD(&page->lru); @@ -673,6 +676,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, return; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); set_page_private(page, 0); diff --git a/mm/page_owner.c b/mm/page_owner.c index 792b56da13d8..c6cda3e36212 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -55,6 +55,8 @@ void __reset_page_owner(struct page *page, unsigned int order) for (i = 0; i < (1 << order); i++) { page_ext = lookup_page_ext(page + i); + if (unlikely(!page_ext)) + continue; __clear_bit(PAGE_EXT_OWNER, &page_ext->flags); } } @@ -62,6 +64,7 @@ void __reset_page_owner(struct page *page, unsigned int order) void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page); + struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries), @@ -69,6 +72,9 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) .skip = 3, }; + if (unlikely(!page_ext)) + return; + save_stack_trace(&trace); page_ext->order = order; @@ -82,6 +88,8 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; page_ext->last_migrate_reason = reason; } @@ -89,6 +97,12 @@ void __set_page_owner_migrate_reason(struct page *page, int reason) gfp_t __get_page_owner_gfp(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + /* + * The caller just returns 0 if no valid gfp + * So return 0 here too. + */ + return 0; return page_ext->gfp_mask; } @@ -99,6 +113,9 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) struct page_ext *new_ext = lookup_page_ext(newpage); int i; + if (unlikely(!old_ext || !new_ext)) + return; + new_ext->order = old_ext->order; new_ext->gfp_mask = old_ext->gfp_mask; new_ext->nr_entries = old_ext->nr_entries; @@ -193,6 +210,11 @@ void __dump_page_owner(struct page *page) gfp_t gfp_mask = page_ext->gfp_mask; int mt = gfpflags_to_migratetype(gfp_mask); + if (unlikely(!page_ext)) { + pr_alert("There is not page extension available.\n"); + return; + } + if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { pr_alert("page_owner info is not active (free page?)\n"); return; @@ -251,6 +273,8 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) } page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue; /* * Some pages could be missed by concurrent allocation or free, @@ -317,6 +341,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) continue; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue; /* Maybe overraping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) diff --git a/mm/page_poison.c b/mm/page_poison.c index 1eae5fad2446..2e647c65916b 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -54,6 +54,9 @@ static inline void set_page_poison(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); } @@ -62,6 +65,9 @@ static inline void clear_page_poison(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); } @@ -70,7 +76,7 @@ bool page_is_poisoned(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page); - if (!page_ext) + if (unlikely(!page_ext)) return false; return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); diff --git a/mm/vmstat.c b/mm/vmstat.c index 77e42ef388c2..cb2a67bb4158 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1061,6 +1061,8 @@ static void pagetypeinfo_showmixedcount_print(struct seq_file *m, continue; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue; if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) continue; -- cgit v1.3-6-gb490 From eedf265aa003b4781de24cfed40a655a664457e6 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 2 Jun 2016 10:29:47 -0500 Subject: devpts: Make each mount of devpts an independent filesystem. The /dev/ptmx device node is changed to lookup the directory entry "pts" in the same directory as the /dev/ptmx device node was opened in. If there is a "pts" entry and that entry is a devpts filesystem /dev/ptmx uses that filesystem. Otherwise the open of /dev/ptmx fails. The DEVPTS_MULTIPLE_INSTANCES configuration option is removed, so that userspace can now safely depend on each mount of devpts creating a new instance of the filesystem. Each mount of devpts is now a separate and equal filesystem. Reserved ttys are now available to all instances of devpts where the mounter is in the initial mount namespace. A new vfs helper path_pts is introduced that finds a directory entry named "pts" in the directory of the passed in path, and changes the passed in path to point to it. The helper path_pts uses a function path_parent_directory that was factored out of follow_dotdot. In the implementation of devpts: - devpts_mnt is killed as it is no longer meaningful if all mounts of devpts are equal. - pts_sb_from_inode is replaced by just inode->i_sb as all cached inodes in the tty layer are now from the devpts filesystem. - devpts_add_ref is rolled into the new function devpts_ptmx. And the unnecessary inode hold is removed. - devpts_del_ref is renamed devpts_release and reduced to just a deacrivate_super. - The newinstance mount option continues to be accepted but is now ignored. In devpts_fs.h definitions for when !CONFIG_UNIX98_PTYS are removed as they are never used. Documentation/filesystems/devices.txt is updated to describe the current situation. This has been verified to work properly on openwrt-15.05, centos5, centos6, centos7, debian-6.0.2, debian-7.9, debian-8.2, ubuntu-14.04.3, ubuntu-15.10, fedora23, magia-5, mint-17.3, opensuse-42.1, slackware-14.1, gentoo-20151225 (13.0?), archlinux-2015-12-01. With the caveat that on centos6 and on slackware-14.1 that there wind up being two instances of the devpts filesystem mounted on /dev/pts, the lower copy does not end up getting used. Signed-off-by: "Eric W. Biederman" Cc: Greg KH Cc: Peter Hurley Cc: Peter Anvin Cc: Andy Lutomirski Cc: Al Viro Cc: Serge Hallyn Cc: Willy Tarreau Cc: Aurelien Jarno Cc: One Thousand Gnomes Cc: Jann Horn Cc: Jiri Slaby Cc: Florian Weimer Cc: Konstantin Khlebnikov Signed-off-by: Linus Torvalds --- Documentation/filesystems/devpts.txt | 145 +++----------------------- drivers/tty/Kconfig | 11 -- drivers/tty/pty.c | 15 +-- fs/devpts/inode.c | 191 ++++++++++------------------------- fs/namei.c | 49 +++++++-- include/linux/devpts_fs.h | 9 +- include/linux/namei.h | 2 + 7 files changed, 126 insertions(+), 296 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/devpts.txt b/Documentation/filesystems/devpts.txt index 30d2fcb32f72..9f94fe276dea 100644 --- a/Documentation/filesystems/devpts.txt +++ b/Documentation/filesystems/devpts.txt @@ -1,141 +1,26 @@ +Each mount of the devpts filesystem is now distinct such that ptys +and their indicies allocated in one mount are independent from ptys +and their indicies in all other mounts. -To support containers, we now allow multiple instances of devpts filesystem, -such that indices of ptys allocated in one instance are independent of indices -allocated in other instances of devpts. +All mounts of the devpts filesystem now create a /dev/pts/ptmx node +with permissions 0000. -To preserve backward compatibility, this support for multiple instances is -enabled only if: +To retain backwards compatibility the a ptmx device node (aka any node +created with "mknod name c 5 2") when opened will look for an instance +of devpts under the name "pts" in the same directory as the ptmx device +node. - - CONFIG_DEVPTS_MULTIPLE_INSTANCES=y, and - - '-o newinstance' mount option is specified while mounting devpts - -IOW, devpts now supports both single-instance and multi-instance semantics. - -If CONFIG_DEVPTS_MULTIPLE_INSTANCES=n, there is no change in behavior and -this referred to as the "legacy" mode. In this mode, the new mount options -(-o newinstance and -o ptmxmode) will be ignored with a 'bogus option' message -on console. - -If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and devpts is mounted without the -'newinstance' option (as in current start-up scripts) the new mount binds -to the initial kernel mount of devpts. This mode is referred to as the -'single-instance' mode and the current, single-instance semantics are -preserved, i.e PTYs are common across the system. - -The only difference between this single-instance mode and the legacy mode -is the presence of new, '/dev/pts/ptmx' node with permissions 0000, which -can safely be ignored. - -If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and 'newinstance' option is specified, -the mount is considered to be in the multi-instance mode and a new instance -of the devpts fs is created. Any ptys created in this instance are independent -of ptys in other instances of devpts. Like in the single-instance mode, the -/dev/pts/ptmx node is present. To effectively use the multi-instance mode, -open of /dev/ptmx must be a redirected to '/dev/pts/ptmx' using a symlink or -bind-mount. - -Eg: A container startup script could do the following: - - $ chmod 0666 /dev/pts/ptmx - $ rm /dev/ptmx - $ ln -s pts/ptmx /dev/ptmx - $ ns_exec -cm /bin/bash - - # We are now in new container - - $ umount /dev/pts - $ mount -t devpts -o newinstance lxcpts /dev/pts - $ sshd -p 1234 - -where 'ns_exec -cm /bin/bash' calls clone() with CLONE_NEWNS flag and execs -/bin/bash in the child process. A pty created by the sshd is not visible in -the original mount of /dev/pts. +As an option instead of placing a /dev/ptmx device node at /dev/ptmx +it is possible to place a symlink to /dev/pts/ptmx at /dev/ptmx or +to bind mount /dev/ptx/ptmx to /dev/ptmx. If you opt for using +the devpts filesystem in this manner devpts should be mounted with +the ptmxmode=0666, or chmod 0666 /dev/pts/ptmx should be called. Total count of pty pairs in all instances is limited by sysctls: kernel.pty.max = 4096 - global limit -kernel.pty.reserve = 1024 - reserve for initial instance +kernel.pty.reserve = 1024 - reserved for filesystems mounted from the initial mount namespace kernel.pty.nr - current count of ptys Per-instance limit could be set by adding mount option "max=". This feature was added in kernel 3.4 together with sysctl kernel.pty.reserve. In kernels older than 3.4 sysctl kernel.pty.max works as per-instance limit. - -User-space changes ------------------- - -In multi-instance mode (i.e '-o newinstance' mount option is specified at least -once), following user-space issues should be noted. - -1. If -o newinstance mount option is never used, /dev/pts/ptmx can be ignored - and no change is needed to system-startup scripts. - -2. To effectively use multi-instance mode (i.e -o newinstance is specified) - administrators or startup scripts should "redirect" open of /dev/ptmx to - /dev/pts/ptmx using either a bind mount or symlink. - - $ mount -t devpts -o newinstance devpts /dev/pts - - followed by either - - $ rm /dev/ptmx - $ ln -s pts/ptmx /dev/ptmx - $ chmod 666 /dev/pts/ptmx - or - $ mount -o bind /dev/pts/ptmx /dev/ptmx - -3. The '/dev/ptmx -> pts/ptmx' symlink is the preferred method since it - enables better error-reporting and treats both single-instance and - multi-instance mounts similarly. - - But this method requires that system-startup scripts set the mode of - /dev/pts/ptmx correctly (default mode is 0000). The scripts can set the - mode by, either - - - adding ptmxmode mount option to devpts entry in /etc/fstab, or - - using 'chmod 0666 /dev/pts/ptmx' - -4. If multi-instance mode mount is needed for containers, but the system - startup scripts have not yet been updated, container-startup scripts - should bind mount /dev/ptmx to /dev/pts/ptmx to avoid breaking single- - instance mounts. - - Or, in general, container-startup scripts should use: - - mount -t devpts -o newinstance -o ptmxmode=0666 devpts /dev/pts - if [ ! -L /dev/ptmx ]; then - mount -o bind /dev/pts/ptmx /dev/ptmx - fi - - When all devpts mounts are multi-instance, /dev/ptmx can permanently be - a symlink to pts/ptmx and the bind mount can be ignored. - -5. A multi-instance mount that is not accompanied by the /dev/ptmx to - /dev/pts/ptmx redirection would result in an unusable/unreachable pty. - - mount -t devpts -o newinstance lxcpts /dev/pts - - immediately followed by: - - open("/dev/ptmx") - - would create a pty, say /dev/pts/7, in the initial kernel mount. - But /dev/pts/7 would be invisible in the new mount. - -6. The permissions for /dev/pts/ptmx node should be specified when mounting - /dev/pts, using the '-o ptmxmode=%o' mount option (default is 0000). - - mount -t devpts -o newinstance -o ptmxmode=0644 devpts /dev/pts - - The permissions can be later be changed as usual with 'chmod'. - - chmod 666 /dev/pts/ptmx - -7. A mount of devpts without the 'newinstance' option results in binding to - initial kernel mount. This behavior while preserving legacy semantics, - does not provide strict isolation in a container environment. i.e by - mounting devpts without the 'newinstance' option, a container could - get visibility into the 'host' or root container's devpts. - - To workaround this and have strict isolation, all mounts of devpts, - including the mount in the root container, should use the newinstance - option. diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index 82c4d2e45319..95103054c0e4 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -120,17 +120,6 @@ config UNIX98_PTYS All modern Linux systems use the Unix98 ptys. Say Y unless you're on an embedded system and want to conserve memory. -config DEVPTS_MULTIPLE_INSTANCES - bool "Support multiple instances of devpts" - depends on UNIX98_PTYS - default n - ---help--- - Enable support for multiple instances of devpts filesystem. - If you want to have isolated PTY namespaces (eg: in containers), - say Y here. Otherwise, say N. If enabled, each mount of devpts - filesystem with the '-o newinstance' option will create an - independent PTY namespace. - config LEGACY_PTYS bool "Legacy (BSD) PTY support" default y diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index dd4b8417e7f4..f856c4544eea 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -668,7 +668,7 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) else fsi = tty->link->driver_data; devpts_kill_index(fsi, tty->index); - devpts_put_ref(fsi); + devpts_release(fsi); } static const struct tty_operations ptm_unix98_ops = { @@ -733,10 +733,11 @@ static int ptmx_open(struct inode *inode, struct file *filp) if (retval) return retval; - fsi = devpts_get_ref(inode, filp); - retval = -ENODEV; - if (!fsi) + fsi = devpts_acquire(filp); + if (IS_ERR(fsi)) { + retval = PTR_ERR(fsi); goto out_free_file; + } /* find a device that is not in use. */ mutex_lock(&devpts_mutex); @@ -745,7 +746,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) retval = index; if (index < 0) - goto out_put_ref; + goto out_put_fsi; mutex_lock(&tty_mutex); @@ -789,8 +790,8 @@ err_release: return retval; out: devpts_kill_index(fsi, index); -out_put_ref: - devpts_put_ref(fsi); +out_put_fsi: + devpts_release(fsi); out_free_file: tty_free_file(filp); return retval; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 0b2954d7172d..37c134a132c7 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -95,8 +95,6 @@ static struct ctl_table pty_root_table[] = { static DEFINE_MUTEX(allocated_ptys_lock); -static struct vfsmount *devpts_mnt; - struct pts_mount_opts { int setuid; int setgid; @@ -104,7 +102,7 @@ struct pts_mount_opts { kgid_t gid; umode_t mode; umode_t ptmxmode; - int newinstance; + int reserve; int max; }; @@ -117,11 +115,9 @@ static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_mode, "mode=%o"}, -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES {Opt_ptmxmode, "ptmxmode=%o"}, {Opt_newinstance, "newinstance"}, {Opt_max, "max=%d"}, -#endif {Opt_err, NULL} }; @@ -137,15 +133,48 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) return sb->s_fs_info; } -static inline struct super_block *pts_sb_from_inode(struct inode *inode) +struct pts_fs_info *devpts_acquire(struct file *filp) { -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES - if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) - return inode->i_sb; -#endif - if (!devpts_mnt) - return NULL; - return devpts_mnt->mnt_sb; + struct pts_fs_info *result; + struct path path; + struct super_block *sb; + int err; + + path = filp->f_path; + path_get(&path); + + /* Has the devpts filesystem already been found? */ + sb = path.mnt->mnt_sb; + if (sb->s_magic != DEVPTS_SUPER_MAGIC) { + /* Is a devpts filesystem at "pts" in the same directory? */ + err = path_pts(&path); + if (err) { + result = ERR_PTR(err); + goto out; + } + + /* Is the path the root of a devpts filesystem? */ + result = ERR_PTR(-ENODEV); + sb = path.mnt->mnt_sb; + if ((sb->s_magic != DEVPTS_SUPER_MAGIC) || + (path.mnt->mnt_root != sb->s_root)) + goto out; + } + + /* + * pty code needs to hold extra references in case of last /dev/tty close + */ + atomic_inc(&sb->s_active); + result = DEVPTS_SB(sb); + +out: + path_put(&path); + return result; +} + +void devpts_release(struct pts_fs_info *fsi) +{ + deactivate_super(fsi->sb); } #define PARSE_MOUNT 0 @@ -154,9 +183,7 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode) /* * parse_mount_options(): * Set @opts to mount options specified in @data. If an option is not - * specified in @data, set it to its default value. The exception is - * 'newinstance' option which can only be set/cleared on a mount (i.e. - * cannot be changed during remount). + * specified in @data, set it to its default value. * * Note: @data may be NULL (in which case all options are set to default). */ @@ -174,9 +201,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE; opts->max = NR_UNIX98_PTY_MAX; - /* newinstance makes sense only on initial mount */ + /* Only allow instances mounted from the initial mount + * namespace to tap the reserve pool of ptys. + */ if (op == PARSE_MOUNT) - opts->newinstance = 0; + opts->reserve = + (current->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns); while ((p = strsep(&data, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -211,16 +241,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) return -EINVAL; opts->mode = option & S_IALLUGO; break; -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES case Opt_ptmxmode: if (match_octal(&args[0], &option)) return -EINVAL; opts->ptmxmode = option & S_IALLUGO; break; case Opt_newinstance: - /* newinstance makes sense only on initial mount */ - if (op == PARSE_MOUNT) - opts->newinstance = 1; break; case Opt_max: if (match_int(&args[0], &option) || @@ -228,7 +254,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) return -EINVAL; opts->max = option; break; -#endif default: pr_err("called with bogus options\n"); return -EINVAL; @@ -238,7 +263,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts) return 0; } -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES static int mknod_ptmx(struct super_block *sb) { int mode; @@ -305,12 +329,6 @@ static void update_ptmx_mode(struct pts_fs_info *fsi) inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode; } } -#else -static inline void update_ptmx_mode(struct pts_fs_info *fsi) -{ - return; -} -#endif static int devpts_remount(struct super_block *sb, int *flags, char *data) { @@ -344,11 +362,9 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, opts->gid)); seq_printf(seq, ",mode=%03o", opts->mode); -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode); if (opts->max < NR_UNIX98_PTY_MAX) seq_printf(seq, ",max=%d", opts->max); -#endif return 0; } @@ -410,40 +426,11 @@ fail: return -ENOMEM; } -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES -static int compare_init_pts_sb(struct super_block *s, void *p) -{ - if (devpts_mnt) - return devpts_mnt->mnt_sb == s; - return 0; -} - /* * devpts_mount() * - * If the '-o newinstance' mount option was specified, mount a new - * (private) instance of devpts. PTYs created in this instance are - * independent of the PTYs in other devpts instances. - * - * If the '-o newinstance' option was not specified, mount/remount the - * initial kernel mount of devpts. This type of mount gives the - * legacy, single-instance semantics. - * - * The 'newinstance' option is needed to support multiple namespace - * semantics in devpts while preserving backward compatibility of the - * current 'single-namespace' semantics. i.e all mounts of devpts - * without the 'newinstance' mount option should bind to the initial - * kernel mount, like mount_single(). - * - * Mounts with 'newinstance' option create a new, private namespace. - * - * NOTE: - * - * For single-mount semantics, devpts cannot use mount_single(), - * because mount_single()/sget() find and use the super-block from - * the most recent mount of devpts. But that recent mount may be a - * 'newinstance' mount and mount_single() would pick the newinstance - * super-block instead of the initial super-block. + * Mount a new (private) instance of devpts. PTYs created in this + * instance are independent of the PTYs in other devpts instances. */ static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) @@ -456,18 +443,7 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type, if (error) return ERR_PTR(error); - /* Require newinstance for all user namespace mounts to ensure - * the mount options are not changed. - */ - if ((current_user_ns() != &init_user_ns) && !opts.newinstance) - return ERR_PTR(-EINVAL); - - if (opts.newinstance) - s = sget(fs_type, NULL, set_anon_super, flags, NULL); - else - s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags, - NULL); - + s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); @@ -491,18 +467,6 @@ out_undo_sget: return ERR_PTR(error); } -#else -/* - * This supports only the legacy single-instance semantics (no - * multiple-instance semantics) - */ -static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) -{ - return mount_single(fs_type, flags, data, devpts_fill_super); -} -#endif - static void devpts_kill_sb(struct super_block *sb) { struct pts_fs_info *fsi = DEVPTS_SB(sb); @@ -516,9 +480,7 @@ static struct file_system_type devpts_fs_type = { .name = "devpts", .mount = devpts_mount, .kill_sb = devpts_kill_sb, -#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES .fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT, -#endif }; /* @@ -531,16 +493,13 @@ int devpts_new_index(struct pts_fs_info *fsi) int index; int ida_ret; - if (!fsi) - return -ENODEV; - retry: if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL)) return -ENOMEM; mutex_lock(&allocated_ptys_lock); - if (pty_count >= pty_limit - - (fsi->mount_opts.newinstance ? pty_reserve : 0)) { + if (pty_count >= (pty_limit - + (fsi->mount_opts.reserve ? 0 : pty_reserve))) { mutex_unlock(&allocated_ptys_lock); return -ENOSPC; } @@ -571,30 +530,6 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx) mutex_unlock(&allocated_ptys_lock); } -/* - * pty code needs to hold extra references in case of last /dev/tty close - */ -struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file) -{ - struct super_block *sb; - struct pts_fs_info *fsi; - - sb = pts_sb_from_inode(ptmx_inode); - if (!sb) - return NULL; - fsi = DEVPTS_SB(sb); - if (!fsi) - return NULL; - - atomic_inc(&sb->s_active); - return fsi; -} - -void devpts_put_ref(struct pts_fs_info *fsi) -{ - deactivate_super(fsi->sb); -} - /** * devpts_pty_new -- create a new inode in /dev/pts/ * @ptmx_inode: inode of the master @@ -607,16 +542,12 @@ void devpts_put_ref(struct pts_fs_info *fsi) struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) { struct dentry *dentry; - struct super_block *sb; + struct super_block *sb = fsi->sb; struct inode *inode; struct dentry *root; struct pts_mount_opts *opts; char s[12]; - if (!fsi) - return ERR_PTR(-ENODEV); - - sb = fsi->sb; root = sb->s_root; opts = &fsi->mount_opts; @@ -676,20 +607,8 @@ void devpts_pty_kill(struct dentry *dentry) static int __init init_devpts_fs(void) { int err = register_filesystem(&devpts_fs_type); - struct ctl_table_header *table; - if (!err) { - struct vfsmount *mnt; - - table = register_sysctl_table(pty_root_table); - mnt = kern_mount(&devpts_fs_type); - if (IS_ERR(mnt)) { - err = PTR_ERR(mnt); - unregister_filesystem(&devpts_fs_type); - unregister_sysctl_table(table); - } else { - devpts_mnt = mnt; - } + register_sysctl_table(pty_root_table); } return err; } diff --git a/fs/namei.c b/fs/namei.c index 4c4f95ac8aa5..6a82fb7e2127 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1416,21 +1416,28 @@ static void follow_mount(struct path *path) } } +static int path_parent_directory(struct path *path) +{ + struct dentry *old = path->dentry; + /* rare case of legitimate dget_parent()... */ + path->dentry = dget_parent(path->dentry); + dput(old); + if (unlikely(!path_connected(path))) + return -ENOENT; + return 0; +} + static int follow_dotdot(struct nameidata *nd) { while(1) { - struct dentry *old = nd->path.dentry; - if (nd->path.dentry == nd->root.dentry && nd->path.mnt == nd->root.mnt) { break; } if (nd->path.dentry != nd->path.mnt->mnt_root) { - /* rare case of legitimate dget_parent()... */ - nd->path.dentry = dget_parent(nd->path.dentry); - dput(old); - if (unlikely(!path_connected(&nd->path))) - return -ENOENT; + int ret = path_parent_directory(&nd->path); + if (ret) + return ret; break; } if (!follow_up(&nd->path)) @@ -2514,6 +2521,34 @@ struct dentry *lookup_one_len_unlocked(const char *name, } EXPORT_SYMBOL(lookup_one_len_unlocked); +#ifdef CONFIG_UNIX98_PTYS +int path_pts(struct path *path) +{ + /* Find something mounted on "pts" in the same directory as + * the input path. + */ + struct dentry *child, *parent; + struct qstr this; + int ret; + + ret = path_parent_directory(path); + if (ret) + return ret; + + parent = path->dentry; + this.name = "pts"; + this.len = 3; + child = d_hash_and_lookup(parent, &this); + if (!child) + return -ENOENT; + + path->dentry = child; + dput(parent); + follow_mount(path); + return 0; +} +#endif + int user_path_at_empty(int dfd, const char __user *name, unsigned flags, struct path *path, int *empty) { diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 5871f292b596..277ab9af9ac2 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -15,13 +15,12 @@ #include -struct pts_fs_info; - #ifdef CONFIG_UNIX98_PTYS -/* Look up a pts fs info and get a ref to it */ -struct pts_fs_info *devpts_get_ref(struct inode *, struct file *); -void devpts_put_ref(struct pts_fs_info *); +struct pts_fs_info; + +struct pts_fs_info *devpts_acquire(struct file *); +void devpts_release(struct pts_fs_info *); int devpts_new_index(struct pts_fs_info *); void devpts_kill_index(struct pts_fs_info *, int); diff --git a/include/linux/namei.h b/include/linux/namei.h index ec5ec2818a28..d3d0398f2a1b 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -45,6 +45,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 +extern int path_pts(struct path *path); + extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); static inline int user_path_at(int dfd, const char __user *name, unsigned flags, -- cgit v1.3-6-gb490 From d3ae2bdeba9bad8cb95301451aeaf03ce31e82f0 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:36 +0300 Subject: IB/mlx5: Fix pkey_index length in the QP path record Pkey index fields in the QP context path record are extended to 16 bits, as required by IB spec (version 1.3). This change affects all QP commands which include path records. To enable this change, moved the free adaptive routing flag bit (free_ar) to the most significant byte of the QP path record. Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB ...') Signed-off-by: Noa Osherovich Reviewed-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 15 ++++++++------- include/linux/mlx5/qp.h | 5 ++--- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 43c1441b6fb8..6b90bfdea830 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1859,7 +1859,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int err; if (attr_mask & IB_QP_PKEY_INDEX) - path->pkey_index = attr->pkey_index; + path->pkey_index = cpu_to_be16(attr->pkey_index); if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= @@ -1879,9 +1879,9 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, ah->grh.sgid_index); path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4; } else { - path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; - path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : - 0; + path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; + path->fl_free_ar |= + (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0; path->rlid = cpu_to_be16(ah->dlid); path->grh_mlid = ah->src_path_bits & 0x7f; if (ah->ah_flags & IB_AH_GRH) @@ -2266,7 +2266,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num); if (attr_mask & IB_QP_PKEY_INDEX) - context->pri_path.pkey_index = attr->pkey_index; + context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index); /* todo implement counter_index functionality */ @@ -4015,11 +4015,12 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); - qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f; + qp_attr->alt_pkey_index = + be16_to_cpu(context->alt_path.pkey_index); qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } - qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f; + qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index); qp_attr->port_num = context->pri_path.port; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 64221027bf1f..e4e29882fdfd 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -460,10 +460,9 @@ struct mlx5_core_qp { }; struct mlx5_qp_path { - u8 fl; + u8 fl_free_ar; u8 rsvd3; - u8 free_ar; - u8 pkey_index; + __be16 pkey_index; u8 rsvd0; u8 grh_mlid; __be16 rlid; -- cgit v1.3-6-gb490 From 1607f09c226d1378439c411baaaa020042750338 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Sun, 5 Jun 2016 23:14:14 +0200 Subject: coredump: fix dumping through pipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The offset in the core file used to be tracked with ->written field of the coredump_params structure. The field was retired in favour of file->f_pos. However, ->f_pos is not maintained for pipes which leads to breakage. Restore explicit tracking of the offset in coredump_params. Introduce ->pos field for this purpose since ->written was already reused. Fixes: a00839395103 ("get rid of coredump_params->written"). Reported-by: Zbigniew Jędrzejewski-Szmek Signed-off-by: Mateusz Guzik Reviewed-by: Omar Sandoval Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/coredump.c | 2 +- fs/binfmt_elf.c | 2 +- fs/binfmt_elf_fdpic.c | 2 +- fs/coredump.c | 4 +++- include/linux/binfmts.h | 1 + 5 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 84fb984f29c1..85c85eb3e245 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -172,7 +172,7 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i, if (rc < 0) goto out; - skip = roundup(cprm->file->f_pos - total + sz, 4) - cprm->file->f_pos; + skip = roundup(cprm->pos - total + sz, 4) - cprm->pos; if (!dump_skip(cprm, skip)) goto Eio; out: diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e158b22ef32f..a7a28110dc80 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2275,7 +2275,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; /* Align to page */ - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 71ade0e556b7..203589311bf8 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1787,7 +1787,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; } - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; if (!elf_fdpic_dump_segments(cprm)) diff --git a/fs/coredump.c b/fs/coredump.c index 38a7ab87e10a..281b768000e6 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -794,6 +794,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr) return 0; file->f_pos = pos; cprm->written += n; + cprm->pos += n; nr -= n; } return 1; @@ -808,6 +809,7 @@ int dump_skip(struct coredump_params *cprm, size_t nr) if (dump_interrupted() || file->f_op->llseek(file, nr, SEEK_CUR) < 0) return 0; + cprm->pos += nr; return 1; } else { while (nr > PAGE_SIZE) { @@ -822,7 +824,7 @@ EXPORT_SYMBOL(dump_skip); int dump_align(struct coredump_params *cprm, int align) { - unsigned mod = cprm->file->f_pos & (align - 1); + unsigned mod = cprm->pos & (align - 1); if (align & (align - 1)) return 0; return mod ? dump_skip(cprm, align - mod) : 1; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 576e4639ca60..314b3caa701c 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -65,6 +65,7 @@ struct coredump_params { unsigned long limit; unsigned long mm_flags; loff_t written; + loff_t pos; }; /* -- cgit v1.3-6-gb490 From 7cfe749fad5158247282f2fee30773fd454029ab Mon Sep 17 00:00:00 2001 From: Tony Makkiel Date: Wed, 18 May 2016 17:22:45 +0100 Subject: leds: core: Fix brightness setting upon hardware blinking enabled Commit 76931edd54f8 ("leds: fix brightness changing when software blinking is active") changed the semantics of led_set_brightness() which according to the documentation should disable blinking upon any brightness setting. Moreover it made it different for soft blink case, where it was possible to change blink brightness, and for hardware blink case, where setting any brightness greater than 0 was ignored. While the change itself is against the documentation claims, it was driven also by the fact that timer trigger remained active after turning blinking off. Fixing that would have required major refactoring in the led-core, led-class, and led-triggers because of cyclic dependencies. Finally, it has been decided that allowing for brightness change during blinking is beneficial as it can be accomplished without disturbing blink rhythm. The change in brightness setting semantics will not affect existing LED class drivers that implement blink_set op thanks to the LED_BLINK_SW flag introduced by this patch. The flag state will be from now on checked in led_set_brightness() which will allow to distinguish between software and hardware blink mode. In the latter case the control will be passed directly to the drivers which apply their semantics on brightness set, which is disable the blinking in case of most such drivers. New drivers will apply new semantics and just change the brightness while hardware blinking is on, if possible. The issue was smuggled by subsequent LED core improvements, which modified the code that originally introduced the problem. Fixes: f1e80c07416a ("leds: core: Add two new LED_BLINK_ flags") Signed-off-by: Tony Makkiel Signed-off-by: Jacek Anaszewski --- Documentation/leds/leds-class.txt | 4 ++-- drivers/leds/led-core.c | 9 ++++++--- include/linux/leds.h | 23 ++++++++++++----------- 3 files changed, 20 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.txt index d406d98339b2..44f5e6bccd97 100644 --- a/Documentation/leds/leds-class.txt +++ b/Documentation/leds/leds-class.txt @@ -74,8 +74,8 @@ blink_set() function (see ). To set an LED to blinking, however, it is better to use the API function led_blink_set(), as it will check and implement software fallback if necessary. -To turn off blinking again, use the API function led_brightness_set() -as that will not just set the LED brightness but also stop any software +To turn off blinking, use the API function led_brightness_set() +with brightness value LED_OFF, which should stop any software timers that may have been required for blinking. The blink_set() function should choose a user friendly blinking value diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index 3495d5d6547f..3bce44893021 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -53,11 +53,12 @@ static void led_timer_function(unsigned long data) if (!led_cdev->blink_delay_on || !led_cdev->blink_delay_off) { led_set_brightness_nosleep(led_cdev, LED_OFF); + led_cdev->flags &= ~LED_BLINK_SW; return; } if (led_cdev->flags & LED_BLINK_ONESHOT_STOP) { - led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP; + led_cdev->flags &= ~(LED_BLINK_ONESHOT_STOP | LED_BLINK_SW); return; } @@ -151,6 +152,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev, return; } + led_cdev->flags |= LED_BLINK_SW; mod_timer(&led_cdev->blink_timer, jiffies + 1); } @@ -219,6 +221,7 @@ void led_stop_software_blink(struct led_classdev *led_cdev) del_timer_sync(&led_cdev->blink_timer); led_cdev->blink_delay_on = 0; led_cdev->blink_delay_off = 0; + led_cdev->flags &= ~LED_BLINK_SW; } EXPORT_SYMBOL_GPL(led_stop_software_blink); @@ -226,10 +229,10 @@ void led_set_brightness(struct led_classdev *led_cdev, enum led_brightness brightness) { /* - * In case blinking is on delay brightness setting + * If software blink is active, delay brightness setting * until the next timer tick. */ - if (led_cdev->blink_delay_on || led_cdev->blink_delay_off) { + if (led_cdev->flags & LED_BLINK_SW) { /* * If we need to disable soft blinking delegate this to the * work queue task to avoid problems in case we are called diff --git a/include/linux/leds.h b/include/linux/leds.h index d2b13066e781..e5e7f2e80a54 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -42,15 +42,16 @@ struct led_classdev { #define LED_UNREGISTERING (1 << 1) /* Upper 16 bits reflect control information */ #define LED_CORE_SUSPENDRESUME (1 << 16) -#define LED_BLINK_ONESHOT (1 << 17) -#define LED_BLINK_ONESHOT_STOP (1 << 18) -#define LED_BLINK_INVERT (1 << 19) -#define LED_BLINK_BRIGHTNESS_CHANGE (1 << 20) -#define LED_BLINK_DISABLE (1 << 21) -#define LED_SYSFS_DISABLE (1 << 22) -#define LED_DEV_CAP_FLASH (1 << 23) -#define LED_HW_PLUGGABLE (1 << 24) -#define LED_PANIC_INDICATOR (1 << 25) +#define LED_BLINK_SW (1 << 17) +#define LED_BLINK_ONESHOT (1 << 18) +#define LED_BLINK_ONESHOT_STOP (1 << 19) +#define LED_BLINK_INVERT (1 << 20) +#define LED_BLINK_BRIGHTNESS_CHANGE (1 << 21) +#define LED_BLINK_DISABLE (1 << 22) +#define LED_SYSFS_DISABLE (1 << 23) +#define LED_DEV_CAP_FLASH (1 << 24) +#define LED_HW_PLUGGABLE (1 << 25) +#define LED_PANIC_INDICATOR (1 << 26) /* Set LED brightness level * Must not sleep. Use brightness_set_blocking for drivers @@ -72,8 +73,8 @@ struct led_classdev { * and if both are zero then a sensible default should be chosen. * The call should adjust the timings in that case and if it can't * match the values specified exactly. - * Deactivate blinking again when the brightness is set to a fixed - * value via the brightness_set() callback. + * Deactivate blinking again when the brightness is set to LED_OFF + * via the brightness_set() callback. */ int (*blink_set)(struct led_classdev *led_cdev, unsigned long *delay_on, -- cgit v1.3-6-gb490 From 5c1d3310d84309330264e2a06e5000eb289a44ad Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 7 Jun 2016 18:44:48 +0100 Subject: drivers: of: Fix of_pci.h header guard The compilation of of_pci.c is governed by CONFIG_OF_PCI, but the corresponding declarations in of_pci.h are inconsistently guarded by CONFIG_OF, with the result that if CONFIG_PCI is disabled for an OF platform, the dangling external declarations are still active and the inline stub definitions not. So far this has managed to go unnoticed since it happens that the only references to these functions are from code which itself depends on CONFIG_PCI or CONFIG_OF_PCI. Fix this with the appropriate config guard so that any new callers outside PCI-specific code don't start unexpectedly breaking under certain configs. Signed-off-by: Robin Murphy Signed-off-by: Rob Herring --- include/linux/of_pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index f6e9e85164e8..b969e9443962 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -8,7 +8,7 @@ struct pci_dev; struct of_phandle_args; struct device_node; -#ifdef CONFIG_OF +#ifdef CONFIG_OF_PCI int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq); struct device_node *of_pci_find_child_device(struct device_node *parent, unsigned int devfn); -- cgit v1.3-6-gb490 From 06dfeef88573cf032e5c27e37f80ff5237b3318a Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 9 Jun 2016 11:38:34 +0100 Subject: drivers: of: add definition of early_init_dt_alloc_reserved_memory_arch The function early_init_dt_alloc_reserved_memory_arch is defined in drivers/of/of_reserved_mem.c but is not declared in any of the header files. Add the declaration of this to avoid the warning: drivers/of/of_reserved_mem.c:31:19: warning: symbol 'early_init_dt_alloc_reserved_memory_arch' was not declared. Should it be static? Signed-off-by: Ben Dooks [robh: drop extern from declaration] Signed-off-by: Rob Herring --- include/linux/of_reserved_mem.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h index ad2f67054372..c201060e0c6d 100644 --- a/include/linux/of_reserved_mem.h +++ b/include/linux/of_reserved_mem.h @@ -31,6 +31,13 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem); int of_reserved_mem_device_init(struct device *dev); void of_reserved_mem_device_release(struct device *dev); +int early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, + phys_addr_t align, + phys_addr_t start, + phys_addr_t end, + bool nomap, + phys_addr_t *res_base); + void fdt_init_reserved_mem(void); void fdt_reserved_mem_save_node(unsigned long node, const char *uname, phys_addr_t base, phys_addr_t size); -- cgit v1.3-6-gb490 From 418f8399a8bedf376ec13eb01088f04a76ebdd6f Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Fri, 10 Jun 2016 00:07:28 +0300 Subject: net/mlx5: Fix the size of modify QP mailbox Add 16 reserved bytes at the end of mlx5_modify_qp_mbox_in to match the hardware spec definition. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/qp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 64221027bf1f..1532dcf6fc5e 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -560,6 +560,7 @@ struct mlx5_modify_qp_mbox_in { __be32 optparam; u8 rsvd0[4]; struct mlx5_qp_context ctx; + u8 rsvd2[16]; }; struct mlx5_modify_qp_mbox_out { -- cgit v1.3-6-gb490 From 86d56a1a6b7352542661d8a9463758c7f285fce3 Mon Sep 17 00:00:00 2001 From: Shahar Klein Date: Fri, 10 Jun 2016 00:07:30 +0300 Subject: net/mlx5: Fix MLX5_CMD_OP_MAX to be defined correctly Having MLX5_CMD_OP_MAX on another file causes us to repeatedly miss accounting new commands added to the driver and hence there're no entries for them in debugfs. To solve that, we integrate it into the commands enum as the last entry. Fixes: 34a40e689393 ('net/mlx5_core: Introduce modify flow table command') Signed-off-by: Shahar Klein Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 2 -- include/linux/mlx5/mlx5_ifc.h | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 035abdf62cfe..51f0caf299d8 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1240,8 +1240,6 @@ struct mlx5_destroy_psv_out { u8 rsvd[8]; }; -#define MLX5_CMD_OP_MAX 0x920 - enum { VPORT_STATE_DOWN = 0x0, VPORT_STATE_UP = 0x1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9a05cd7e5890..986a615f623c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -205,7 +205,8 @@ enum { MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, - MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c + MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, + MLX5_CMD_OP_MAX }; struct mlx5_ifc_flow_table_fields_supported_bits { -- cgit v1.3-6-gb490 From 876d634d19e41603aab91455f2c52a78a28372d5 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Fri, 10 Jun 2016 00:07:32 +0300 Subject: net/mlx5: Fix flow steering NIC capabilities check Flow steering infrastructure is currently used only on link layer ethernet, therefore the driver should initialize the flow steering when the device link layer is ethernet. In addition, add missing capability check before initializing the namespace of NIC RX flow tables. Fixes: 2530236303d9 ('net/mlx5_core: Flow steering tree initialization') Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 +++++++++- include/linux/mlx5/device.h | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index fa6fec1930f5..c1efa5517d17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1767,6 +1767,9 @@ static void cleanup_root_ns(struct mlx5_core_dev *dev) void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + cleanup_root_ns(dev); cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns); @@ -1828,15 +1831,20 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) { int err = 0; + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + err = mlx5_init_fc_stats(dev); if (err) return err; - if (MLX5_CAP_GEN(dev, nic_flow_table)) { + if (MLX5_CAP_GEN(dev, nic_flow_table) && + MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { err = init_root_ns(dev); if (err) goto err; } + if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { err = init_fdb_root_ns(dev); if (err) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 51f0caf299d8..73a48479892d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1367,6 +1367,12 @@ enum mlx5_cap_type { #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) +#define MLX5_CAP_FLOWTABLE_NIC_RX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.cap) + +#define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap) + #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) -- cgit v1.3-6-gb490 From 23898c763f4af6f5c80b0230b1ea788a0ce3cf73 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Fri, 10 Jun 2016 00:07:37 +0300 Subject: net/mlx5: E-Switch, Modify node guid on vf set MAC In RoCE, the RDMA-CM needs the node guid to establish connection between nodes. Today, the node guid exposed to mlx5 Ethernet VFs is zero, therefore RDMA-CM on the VF is broken. Whenever the administrator sets a MAC for a VF, derive the node guid from it and set it as well in the following way: MAC: e4:1d:2d:b3:f4:01 -> node_guid: e4:1d:2d:ff:fe:b3:f4:01 Fixes: 77256579c6b43 ('net/mlx5: E-Switch, Introduce Vport...') Signed-off-by: Noa Osherovich Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 23 ++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 38 +++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 9 ++++-- include/linux/mlx5/vport.h | 2 ++ 4 files changed, 68 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index cfec20cffd26..9b1855b199a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1725,11 +1725,24 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) +static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]) { - int err = 0; struct mlx5_vport *evport; + u64 node_guid; + int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; @@ -1753,11 +1766,17 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, return err; } + node_guid_gen_from_mac(&node_guid, mac); + err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid); + if (err) + mlx5_core_warn(esw->dev, + "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n", + vport, err); + mutex_lock(&esw->state_lock); if (evport->enabled) err = esw_vport_ingress_config(esw, evport); mutex_unlock(&esw->state_lock); - return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b69dadcfb897..daf44cd4c566 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -508,6 +508,44 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); +int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, + u32 vport, u64 node_guid) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_context; + u8 *guid; + void *in; + int err; + + if (!vport) + return -EINVAL; + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EACCES; + if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify)) + return -ENOTSUPP; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.node_guid, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport); + + nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + guid = MLX5_ADDR_OF(nic_vport_context, nic_vport_context, + node_guid); + MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} + int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 986a615f623c..e955a2859009 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -501,7 +501,9 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x1b]; + u8 reserved_at_5[0x19]; + u8 nic_vport_node_guid_modify[0x1]; + u8 nic_vport_port_guid_modify[0x1]; u8 reserved_at_20[0x7e0]; }; @@ -4584,7 +4586,10 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { }; struct mlx5_ifc_modify_nic_vport_field_select_bits { - u8 reserved_at_0[0x19]; + u8 reserved_at_0[0x16]; + u8 node_guid[0x1]; + u8 port_guid[0x1]; + u8 reserved_at_18[0x1]; u8 mtu[0x1]; u8 change_event[0x1]; u8 promisc[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 301da4a5e6bf..6c16c198f680 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -50,6 +50,8 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); +int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, + u32 vport, u64 node_guid); int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, u16 *qkey_viol_cntr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, -- cgit v1.3-6-gb490 From ef2bf4997f7da6efa8540d9cf726c44bf2b863af Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 27 May 2016 09:45:49 -0700 Subject: pwm: Improve args checking in pwm_apply_state() It seems like in the process of refactoring pwm_config() to utilize the newly-introduced pwm_apply_state() API, some args/bounds checking was dropped. In particular, I noted that we are now allowing invalid period selections, e.g.: # echo 1 > /sys/class/pwm/pwmchip0/export # cat /sys/class/pwm/pwmchip0/pwm1/period 100 # echo 101 > /sys/class/pwm/pwmchip0/pwm1/duty_cycle [... driver may or may not reject the value, or trigger some logic bug ...] It's better to see: # echo 1 > /sys/class/pwm/pwmchip0/export # cat /sys/class/pwm/pwmchip0/pwm1/period 100 # echo 101 > /sys/class/pwm/pwmchip0/pwm1/duty_cycle -bash: echo: write error: Invalid argument This patch reintroduces some bounds checks in both pwm_config() (for its signed parameters; we don't want to convert negative values into large unsigned values) and in pwm_apply_state() (which fix the above described behavior, as well as other potential API misuses). Fixes: 5ec803edcb70 ("pwm: Add core infrastructure to allow atomic updates") Signed-off-by: Brian Norris Acked-by: Boris Brezillon Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 3 ++- include/linux/pwm.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index dba3843c53b8..ed337a8c34ab 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -457,7 +457,8 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state) { int err; - if (!pwm) + if (!pwm || !state || !state->period || + state->duty_cycle > state->period) return -EINVAL; if (!memcmp(state, &pwm->state, sizeof(*state))) diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 17018f3c066e..908b67c847cd 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -235,6 +235,9 @@ static inline int pwm_config(struct pwm_device *pwm, int duty_ns, if (!pwm) return -EINVAL; + if (duty_ns < 0 || period_ns < 0) + return -EINVAL; + pwm_get_state(pwm, &state); if (state.duty_cycle == duty_ns && state.period == period_ns) return 0; -- cgit v1.3-6-gb490 From ba65dc5ef16f82fba77869cecf7a7d515f61446b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 10 Jun 2016 11:32:47 -0400 Subject: much milder d_walk() race d_walk() relies upon the tree not getting rearranged under it without rename_lock being touched. And we do grab rename_lock around the places that change the tree topology. Unfortunately, branch reordering is just as bad from d_walk() POV and we have two places that do it without touching rename_lock - one in handling of cursors (for ramfs-style directories) and another in autofs. autofs one is a separate story; this commit deals with the cursors. * mark cursor dentries explicitly at allocation time * make __dentry_kill() leave ->d_child.next pointing to the next non-cursor sibling, making sure that it won't be moved around unnoticed before the parent is relocked on ascend-to-parent path in d_walk(). * make d_walk() skip cursors explicitly; strictly speaking it's not necessary (all callbacks we pass to d_walk() are no-ops on cursors), but it makes analysis easier. Signed-off-by: Al Viro --- fs/dcache.c | 58 ++++++++++++++++++++++++++++++++++++++++++++------ fs/internal.h | 1 + fs/libfs.c | 4 +--- include/linux/dcache.h | 1 + 4 files changed, 55 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index 817c243c1ff1..b7eddfd35aa5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -507,6 +507,44 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); +static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent) +{ + struct dentry *next; + /* + * Inform d_walk() and shrink_dentry_list() that we are no longer + * attached to the dentry tree + */ + dentry->d_flags |= DCACHE_DENTRY_KILLED; + if (unlikely(list_empty(&dentry->d_child))) + return; + __list_del_entry(&dentry->d_child); + /* + * Cursors can move around the list of children. While we'd been + * a normal list member, it didn't matter - ->d_child.next would've + * been updated. However, from now on it won't be and for the + * things like d_walk() it might end up with a nasty surprise. + * Normally d_walk() doesn't care about cursors moving around - + * ->d_lock on parent prevents that and since a cursor has no children + * of its own, we get through it without ever unlocking the parent. + * There is one exception, though - if we ascend from a child that + * gets killed as soon as we unlock it, the next sibling is found + * using the value left in its ->d_child.next. And if _that_ + * pointed to a cursor, and cursor got moved (e.g. by lseek()) + * before d_walk() regains parent->d_lock, we'll end up skipping + * everything the cursor had been moved past. + * + * Solution: make sure that the pointer left behind in ->d_child.next + * points to something that won't be moving around. I.e. skip the + * cursors. + */ + while (dentry->d_child.next != &parent->d_subdirs) { + next = list_entry(dentry->d_child.next, struct dentry, d_child); + if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR))) + break; + dentry->d_child.next = next->d_child.next; + } +} + static void __dentry_kill(struct dentry *dentry) { struct dentry *parent = NULL; @@ -532,12 +570,7 @@ static void __dentry_kill(struct dentry *dentry) } /* if it was on the hash then remove it */ __d_drop(dentry); - __list_del_entry(&dentry->d_child); - /* - * Inform d_walk() that we are no longer attached to the - * dentry tree - */ - dentry->d_flags |= DCACHE_DENTRY_KILLED; + dentry_unlist(dentry, parent); if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -1203,6 +1236,9 @@ resume: struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; + if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR)) + continue; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); ret = enter(data, dentry); @@ -1651,6 +1687,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) } EXPORT_SYMBOL(d_alloc); +struct dentry *d_alloc_cursor(struct dentry * parent) +{ + struct dentry *dentry = __d_alloc(parent->d_sb, NULL); + if (dentry) { + dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR; + dentry->d_parent = dget(parent); + } + return dentry; +} + /** * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems) * @sb: the superblock diff --git a/fs/internal.h b/fs/internal.h index b71deeecea17..f57ced528cde 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -130,6 +130,7 @@ extern int invalidate_inodes(struct super_block *, bool); extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); extern int d_set_mounted(struct dentry *dentry); extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc); +extern struct dentry *d_alloc_cursor(struct dentry *); /* * read_write.c diff --git a/fs/libfs.c b/fs/libfs.c index 3db2721144c2..cedeacbae303 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -71,9 +71,7 @@ EXPORT_SYMBOL(simple_lookup); int dcache_dir_open(struct inode *inode, struct file *file) { - static struct qstr cursor_name = QSTR_INIT(".", 1); - - file->private_data = d_alloc(file->f_path.dentry, &cursor_name); + file->private_data = d_alloc_cursor(file->f_path.dentry); return file->private_data ? 0 : -ENOMEM; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 484c8792da82..bcd0c64e3ed8 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -212,6 +212,7 @@ struct dentry_operations { #define DCACHE_OP_REAL 0x08000000 #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ +#define DCACHE_DENTRY_CURSOR 0x20000000 extern seqlock_t rename_lock; -- cgit v1.3-6-gb490 From d50039ea5ee63c589b0434baa5ecf6e5075bb6f9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 16 May 2016 17:03:42 -0400 Subject: nfsd4/rpc: move backchannel create logic into rpc code Also simplify the logic a bit. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Acked-by: Trond Myklebust --- fs/nfsd/nfs4callback.c | 18 +----------------- include/linux/sunrpc/clnt.h | 2 -- net/sunrpc/clnt.c | 12 ++++++++++-- 3 files changed, 11 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7389cb1d7409..04c68d900324 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -710,22 +710,6 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc } } -static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args) -{ - struct rpc_xprt *xprt; - - if (args->protocol != XPRT_TRANSPORT_BC_TCP) - return rpc_create(args); - - xprt = args->bc_xprt->xpt_bc_xprt; - if (xprt) { - xprt_get(xprt); - return rpc_create_xprt(args, xprt); - } - - return rpc_create(args); -} - static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { int maxtime = max_cb_time(clp->net); @@ -768,7 +752,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ - client = create_backchannel_client(&args); + client = rpc_create(&args); if (IS_ERR(client)) { dprintk("NFSD: couldn't create callback client: %ld\n", PTR_ERR(client)); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 19c659d1c0f8..b6810c92b8bb 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -137,8 +137,6 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) struct rpc_clnt *rpc_create(struct rpc_create_args *args); -struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, - struct rpc_xprt *xprt); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, const struct rpc_program *, u32); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 173c5dd2d751..b33721d41adc 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -446,7 +446,7 @@ out_no_rpciod: return ERR_PTR(err); } -struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, +static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, struct rpc_xprt *xprt) { struct rpc_clnt *clnt = NULL; @@ -484,7 +484,6 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, return clnt; } -EXPORT_SYMBOL_GPL(rpc_create_xprt); /** * rpc_create - create an RPC client and transport with one call @@ -510,6 +509,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) }; char servername[48]; + if (args->bc_xprt) { + WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + xprt = args->bc_xprt->xpt_bc_xprt; + if (xprt) { + xprt_get(xprt); + return rpc_create_xprt(args, xprt); + } + } + if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS) xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS; if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT) -- cgit v1.3-6-gb490 From 39a9beab5acb83176e8b9a4f0778749a09341f1f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 17 May 2016 12:38:21 -0400 Subject: rpc: share one xps between all backchannels The spec allows backchannels for multiple clients to share the same tcp connection. When that happens, we need to use the same xprt for all of them. Similarly, we need the same xps. This fixes list corruption introduced by the multipath code. Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields Acked-by: Trond Myklebust --- include/linux/sunrpc/svc_xprt.h | 1 + include/linux/sunrpc/xprt.h | 1 + net/sunrpc/clnt.c | 18 ++++++++++++++---- net/sunrpc/svc_xprt.c | 2 ++ net/sunrpc/xprtsock.c | 1 + 5 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index b7dabc4baafd..79ba50856707 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -84,6 +84,7 @@ struct svc_xprt { struct net *xpt_net; struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */ + struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */ }; static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 5aa3834619a8..5e3e1b63dbb3 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -297,6 +297,7 @@ struct xprt_create { size_t addrlen; const char *servername; struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ + struct rpc_xprt_switch *bc_xps; unsigned int flags; }; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b33721d41adc..2808d550d273 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -452,10 +452,20 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, struct rpc_clnt *clnt = NULL; struct rpc_xprt_switch *xps; - xps = xprt_switch_alloc(xprt, GFP_KERNEL); - if (xps == NULL) { - xprt_put(xprt); - return ERR_PTR(-ENOMEM); + if (args->bc_xprt && args->bc_xprt->xpt_bc_xps) { + WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP); + xps = args->bc_xprt->xpt_bc_xps; + xprt_switch_get(xps); + } else { + xps = xprt_switch_alloc(xprt, GFP_KERNEL); + if (xps == NULL) { + xprt_put(xprt); + return ERR_PTR(-ENOMEM); + } + if (xprt->bc_xprt) { + xprt_switch_get(xps); + xprt->bc_xprt->xpt_bc_xps = xps; + } } clnt = rpc_new_client(args, xps, xprt, NULL); if (IS_ERR(clnt)) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index f5572e31d518..4f01f63102ee 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -136,6 +136,8 @@ static void svc_xprt_free(struct kref *kref) /* See comment on corresponding get in xs_setup_bc_tcp(): */ if (xprt->xpt_bc_xprt) xprt_put(xprt->xpt_bc_xprt); + if (xprt->xpt_bc_xps) + xprt_switch_put(xprt->xpt_bc_xps); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2d3e0c42361e..7e2b2fa189c3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -3057,6 +3057,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) return xprt; args->bc_xprt->xpt_bc_xprt = NULL; + args->bc_xprt->xpt_bc_xps = NULL; xprt_put(xprt); ret = ERR_PTR(-EINVAL); out_err: -- cgit v1.3-6-gb490 From 3a4955111ad46a022f05b51f91306d864f989625 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 27 May 2016 18:08:27 -0400 Subject: isa: Allow ISA-style drivers on modern systems Several modern devices, such as PC/104 cards, are expected to run on modern systems via an ISA bus interface. Since ISA is a legacy interface for most modern architectures, ISA support should remain disabled in general. Support for ISA-style drivers should be enabled on a per driver basis. To allow ISA-style drivers on modern systems, this patch introduces the ISA_BUS_API and ISA_BUS Kconfig options. The ISA bus driver will now build conditionally on the ISA_BUS_API Kconfig option, which defaults to the legacy ISA Kconfig option. The ISA_BUS Kconfig option allows the ISA_BUS_API Kconfig option to be selected on architectures which do not enable ISA (e.g. X86_64). The ISA_BUS Kconfig option is currently only implemented for X86 architectures. Other architectures may have their own ISA_BUS Kconfig options added as required. Reviewed-by: Guenter Roeck Signed-off-by: William Breathitt Gray Acked-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/Kconfig | 3 +++ arch/x86/Kconfig | 9 +++++++++ drivers/base/Makefile | 2 +- include/linux/isa.h | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index d794384a0404..e9734796531f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -606,6 +606,9 @@ config HAVE_ARCH_HASH file which provides platform-specific implementations of some functions in or fs/namei.c. +config ISA_BUS_API + def_bool ISA + # # ABI hall of shame # diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0a7b885964ba..d9a94da0c29f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2439,6 +2439,15 @@ config PCI_CNB20LE_QUIRK source "drivers/pci/Kconfig" +config ISA_BUS + bool "ISA-style bus support on modern systems" if EXPERT + select ISA_BUS_API + help + Enables ISA-style drivers on modern systems. This is necessary to + support PC/104 devices on X86_64 platforms. + + If unsure, say N. + # x86_64 have no ISA slots, but can have ISA-style DMA. config ISA_DMA_API bool "ISA-style DMA support" if (X86_64 && EXPERT) diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 6b2a84e7f2be..2609ba20b396 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_DMA_CMA) += dma-contiguous.o obj-y += power/ obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o -obj-$(CONFIG_ISA) += isa.o +obj-$(CONFIG_ISA_BUS_API) += isa.o obj-$(CONFIG_FW_LOADER) += firmware_class.o obj-$(CONFIG_NUMA) += node.o obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o diff --git a/include/linux/isa.h b/include/linux/isa.h index 5ab85281230b..384ab9b7d79a 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -22,7 +22,7 @@ struct isa_driver { #define to_isa_driver(x) container_of((x), struct isa_driver, driver) -#ifdef CONFIG_ISA +#ifdef CONFIG_ISA_BUS_API int isa_register_driver(struct isa_driver *, unsigned int); void isa_unregister_driver(struct isa_driver *); #else -- cgit v1.3-6-gb490 From 5e25db870ec983be138b343a3d04c79a5c1f1703 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Mon, 9 May 2016 09:39:50 -0400 Subject: isa: Dummy isa_register_driver should return error code The inline isa_register_driver stub simply allows compilation on systems with CONFIG_ISA disabled; the dummy isa_register_driver does not register an isa_driver at all. The inline isa_register_driver should return -ENODEV to indicate lack of support when attempting to register an isa_driver on such a system with CONFIG_ISA disabled. Cc: Matthew Wilcox Reported-by: Sasha Levin Tested-by: Ye Xiaolong Signed-off-by: William Breathitt Gray Signed-off-by: Greg Kroah-Hartman --- include/linux/isa.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/isa.h b/include/linux/isa.h index 384ab9b7d79a..f2d0258414cf 100644 --- a/include/linux/isa.h +++ b/include/linux/isa.h @@ -6,6 +6,7 @@ #define __LINUX_ISA_H #include +#include #include struct isa_driver { @@ -28,7 +29,7 @@ void isa_unregister_driver(struct isa_driver *); #else static inline int isa_register_driver(struct isa_driver *d, unsigned int i) { - return 0; + return -ENODEV; } static inline void isa_unregister_driver(struct isa_driver *d) -- cgit v1.3-6-gb490 From c9b254955b9f8814966f5dabd34c39d0e0a2b437 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 22 Jun 2016 17:27:26 +0300 Subject: IB/mlx5: Fix post send fence logic If the caller specified IB_SEND_FENCE in the send flags of the work request and no previous work request stated that the successive one should be fenced, the work request would be executed without a fence. This could result in RDMA read or atomic operations failure due to a MR being invalidated. Fix this by adding the mlx5 enumeration for fencing RDMA/atomic operations and fix the logic to apply this. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 7 ++++--- include/linux/mlx5/qp.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ce434228a5ea..ce0a7ab35a22 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3332,10 +3332,11 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr) return MLX5_FENCE_MODE_SMALL_AND_FENCE; else return fence; - - } else { - return 0; + } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) { + return MLX5_FENCE_MODE_FENCE; } + + return 0; } static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index e4e29882fdfd..630f66a186b7 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -172,6 +172,7 @@ enum { enum { MLX5_FENCE_MODE_NONE = 0 << 5, MLX5_FENCE_MODE_INITIATOR_SMALL = 1 << 5, + MLX5_FENCE_MODE_FENCE = 2 << 5, MLX5_FENCE_MODE_STRONG_ORDERING = 3 << 5, MLX5_FENCE_MODE_SMALL_AND_FENCE = 4 << 5, }; -- cgit v1.3-6-gb490 From 4c5ea0a9cd02d6aa8adc86e100b2a4cff8d614ff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 21 Jun 2016 18:52:17 +0200 Subject: locking/static_key: Fix concurrent static_key_slow_inc() The following scenario is possible: CPU 1 CPU 2 static_key_slow_inc() atomic_inc_not_zero() -> key.enabled == 0, no increment jump_label_lock() atomic_inc_return() -> key.enabled == 1 now static_key_slow_inc() atomic_inc_not_zero() -> key.enabled == 1, inc to 2 return ** static key is wrong! jump_label_update() jump_label_unlock() Testing the static key at the point marked by (**) will follow the wrong path for jumps that have not been patched yet. This can actually happen when creating many KVM virtual machines with userspace LAPIC emulation; just run several copies of the following program: #include #include #include #include int main(void) { for (;;) { int kvmfd = open("/dev/kvm", O_RDONLY); int vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0); close(ioctl(vmfd, KVM_CREATE_VCPU, 1)); close(vmfd); close(kvmfd); } return 0; } Every KVM_CREATE_VCPU ioctl will attempt a static_key_slow_inc() call. The static key's purpose is to skip NULL pointer checks and indeed one of the processes eventually dereferences NULL. As explained in the commit that introduced the bug: 706249c222f6 ("locking/static_keys: Rework update logic") jump_label_update() needs key.enabled to be true. The solution adopted here is to temporarily make key.enabled == -1, and use go down the slow path when key.enabled <= 0. Reported-by: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Peter Zijlstra (Intel) Cc: # v4.3+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 706249c222f6 ("locking/static_keys: Rework update logic") Link: http://lkml.kernel.org/r/1466527937-69798-1-git-send-email-pbonzini@redhat.com [ Small stylistic edits to the changelog and the code. ] Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 16 +++++++++++++--- kernel/jump_label.c | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 0536524bb9eb..68904469fba1 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -117,13 +117,18 @@ struct module; #include +#ifdef HAVE_JUMP_LABEL + static inline int static_key_count(struct static_key *key) { - return atomic_read(&key->enabled); + /* + * -1 means the first static_key_slow_inc() is in progress. + * static_key_enabled() must return true, so return 1 here. + */ + int n = atomic_read(&key->enabled); + return n >= 0 ? n : 1; } -#ifdef HAVE_JUMP_LABEL - #define JUMP_TYPE_FALSE 0UL #define JUMP_TYPE_TRUE 1UL #define JUMP_TYPE_MASK 1UL @@ -162,6 +167,11 @@ extern void jump_label_apply_nops(struct module *mod); #else /* !HAVE_JUMP_LABEL */ +static inline int static_key_count(struct static_key *key) +{ + return atomic_read(&key->enabled); +} + static __always_inline void jump_label_init(void) { static_key_initialized = true; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 05254eeb4b4e..4b353e0be121 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -58,13 +58,36 @@ static void jump_label_update(struct static_key *key); void static_key_slow_inc(struct static_key *key) { + int v, v1; + STATIC_KEY_CHECK_USE(); - if (atomic_inc_not_zero(&key->enabled)) - return; + + /* + * Careful if we get concurrent static_key_slow_inc() calls; + * later calls must wait for the first one to _finish_ the + * jump_label_update() process. At the same time, however, + * the jump_label_update() call below wants to see + * static_key_enabled(&key) for jumps to be updated properly. + * + * So give a special meaning to negative key->enabled: it sends + * static_key_slow_inc() down the slow path, and it is non-zero + * so it counts as "enabled" in jump_label_update(). Note that + * atomic_inc_unless_negative() checks >= 0, so roll our own. + */ + for (v = atomic_read(&key->enabled); v > 0; v = v1) { + v1 = atomic_cmpxchg(&key->enabled, v, v + 1); + if (likely(v1 == v)) + return; + } jump_label_lock(); - if (atomic_inc_return(&key->enabled) == 1) + if (atomic_read(&key->enabled) == 0) { + atomic_set(&key->enabled, -1); jump_label_update(key); + atomic_set(&key->enabled, 1); + } else { + atomic_inc(&key->enabled); + } jump_label_unlock(); } EXPORT_SYMBOL_GPL(static_key_slow_inc); @@ -72,6 +95,13 @@ EXPORT_SYMBOL_GPL(static_key_slow_inc); static void __static_key_slow_dec(struct static_key *key, unsigned long rate_limit, struct delayed_work *work) { + /* + * The negative count check is valid even when a negative + * key->enabled is in use by static_key_slow_inc(); a + * __static_key_slow_dec() before the first static_key_slow_inc() + * returns is unbalanced, because all other static_key_slow_inc() + * instances block while the update is in progress. + */ if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { WARN(atomic_read(&key->enabled) < 0, "jump label: negative count!\n"); -- cgit v1.3-6-gb490 From b235beea9e996a4d36fed6cfef4801a3e7d7a9a5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 15:09:37 -0700 Subject: Clarify naming of thread info/stack allocators We've had the thread info allocated together with the thread stack for most architectures for a long time (since the thread_info was split off from the task struct), but that is about to change. But the patches that move the thread info to be off-stack (and a part of the task struct instead) made it clear how confused the allocator and freeing functions are. Because the common case was that we share an allocation with the thread stack and the thread_info, the two pointers were identical. That identity then meant that we would have things like ti = alloc_thread_info_node(tsk, node); ... tsk->stack = ti; which certainly _worked_ (since stack and thread_info have the same value), but is rather confusing: why are we assigning a thread_info to the stack? And if we move the thread_info away, the "confusing" code just gets to be entirely bogus. So remove all this confusion, and make it clear that we are doing the stack allocation by renaming and clarifying the function names to be about the stack. The fact that the thread_info then shares the allocation is an implementation detail, and not really about the allocation itself. This is a pure renaming and type fix: we pass in the same pointer, it's just that we clarify what the pointer means. The ia64 code that actually only has one single allocation (for all of task_struct, thread_info and kernel thread stack) now looks a bit odd, but since "tsk->stack" is actually not even used there, that oddity doesn't matter. It would be a separate thing to clean that up, I intentionally left the ia64 changes as a pure brute-force renaming and type change. Acked-by: Andy Lutomirski Signed-off-by: Linus Torvalds --- arch/Kconfig | 4 +-- arch/ia64/Kconfig | 2 +- arch/ia64/include/asm/thread_info.h | 8 +++--- arch/mn10300/include/asm/thread_info.h | 2 +- arch/mn10300/kernel/kgdb.c | 3 +- arch/tile/include/asm/thread_info.h | 2 +- arch/tile/kernel/process.c | 3 +- include/linux/sched.h | 2 +- init/main.c | 4 +-- kernel/fork.c | 50 +++++++++++++++++----------------- 10 files changed, 41 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index e9734796531f..15996290fed4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -226,8 +226,8 @@ config ARCH_INIT_TASK config ARCH_TASK_STRUCT_ALLOCATOR bool -# Select if arch has its private alloc_thread_info() function -config ARCH_THREAD_INFO_ALLOCATOR +# Select if arch has its private alloc_thread_stack() function +config ARCH_THREAD_STACK_ALLOCATOR bool # Select if arch wants to size task_struct dynamically via arch_task_struct_size: diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index f80758cb7157..e109ee95e919 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -45,7 +45,7 @@ config IA64 select GENERIC_SMP_IDLE_THREAD select ARCH_INIT_TASK select ARCH_TASK_STRUCT_ALLOCATOR - select ARCH_THREAD_INFO_ALLOCATOR + select ARCH_THREAD_STACK_ALLOCATOR select ARCH_CLOCKSOURCE_DATA select GENERIC_TIME_VSYSCALL_OLD select SYSCTL_ARCH_UNALIGN_NO_WARN diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index aa995b67c3f5..d1212b84fb83 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -48,15 +48,15 @@ struct thread_info { #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define alloc_thread_info_node(tsk, node) \ - ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) +#define alloc_thread_stack_node(tsk, node) \ + ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) #define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #else #define current_thread_info() ((struct thread_info *) 0) -#define alloc_thread_info_node(tsk, node) ((struct thread_info *) 0) +#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif -#define free_thread_info(ti) /* nothing */ +#define free_thread_stack(ti) /* nothing */ #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index 4861a78c7160..f5f90bbf019d 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -115,7 +115,7 @@ static inline unsigned long current_stack_pointer(void) } #ifndef CONFIG_KGDB -void arch_release_thread_info(struct thread_info *ti); +void arch_release_thread_stack(unsigned long *stack); #endif #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c index 99770823451a..2d7986c386fe 100644 --- a/arch/mn10300/kernel/kgdb.c +++ b/arch/mn10300/kernel/kgdb.c @@ -397,8 +397,9 @@ static bool kgdb_arch_undo_singlestep(struct pt_regs *regs) * single-step state is cleared. At this point the breakpoints should have * been removed by __switch_to(). */ -void arch_release_thread_info(struct thread_info *ti) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *ti = (void *)stack; if (kgdb_sstep_thread == ti) { kgdb_sstep_thread = NULL; diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 4b7cef9e94e0..c1467ac59ce6 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -78,7 +78,7 @@ struct thread_info { #ifndef __ASSEMBLY__ -void arch_release_thread_info(struct thread_info *info); +void arch_release_thread_stack(unsigned long *stack); /* How to get the thread information struct from C. */ register unsigned long stack_pointer __asm__("sp"); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 6b705ccc9cc1..a465d8372edd 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -73,8 +73,9 @@ void arch_cpu_idle(void) /* * Release a thread_info structure */ -void arch_release_thread_info(struct thread_info *info) +void arch_release_thread_stack(unsigned long *stack) { + struct thread_info *info = (void *)stack; struct single_step_state *step_state = info->step_state; if (step_state) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e42ada26345..253538f29ade 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3007,7 +3007,7 @@ static inline int object_is_on_stack(void *obj) return (obj >= stack) && (obj < (stack + THREAD_SIZE)); } -extern void thread_info_cache_init(void); +extern void thread_stack_cache_init(void); #ifdef CONFIG_DEBUG_STACK_USAGE static inline unsigned long stack_not_used(struct task_struct *p) diff --git a/init/main.c b/init/main.c index 4c17fda5c2ff..826fd57fa3aa 100644 --- a/init/main.c +++ b/init/main.c @@ -453,7 +453,7 @@ void __init __weak smp_setup_processor_id(void) } # if THREAD_SIZE >= PAGE_SIZE -void __init __weak thread_info_cache_init(void) +void __init __weak thread_stack_cache_init(void) { } #endif @@ -627,7 +627,7 @@ asmlinkage __visible void __init start_kernel(void) /* Should be run before the first non-init thread is created */ init_espfix_bsp(); #endif - thread_info_cache_init(); + thread_stack_cache_init(); cred_init(); fork_init(); proc_caches_init(); diff --git a/kernel/fork.c b/kernel/fork.c index 5c2c355aa97f..37b9439b8c07 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -148,18 +148,18 @@ static inline void free_task_struct(struct task_struct *tsk) } #endif -void __weak arch_release_thread_info(struct thread_info *ti) +void __weak arch_release_thread_stack(unsigned long *stack) { } -#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR +#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR /* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a * kmemcache based allocator. */ # if THREAD_SIZE >= PAGE_SIZE -static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, +static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) { struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP, @@ -172,33 +172,33 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, return page ? page_address(page) : NULL; } -static inline void free_thread_info(struct thread_info *ti) +static inline void free_thread_stack(unsigned long *stack) { - struct page *page = virt_to_page(ti); + struct page *page = virt_to_page(stack); memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK, -(1 << THREAD_SIZE_ORDER)); __free_kmem_pages(page, THREAD_SIZE_ORDER); } # else -static struct kmem_cache *thread_info_cache; +static struct kmem_cache *thread_stack_cache; -static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, +static struct thread_info *alloc_thread_stack_node(struct task_struct *tsk, int node) { - return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node); + return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); } -static void free_thread_info(struct thread_info *ti) +static void free_stack(unsigned long *stack) { - kmem_cache_free(thread_info_cache, ti); + kmem_cache_free(thread_stack_cache, stack); } -void thread_info_cache_init(void) +void thread_stack_cache_init(void) { - thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, + thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE, THREAD_SIZE, 0, NULL); - BUG_ON(thread_info_cache == NULL); + BUG_ON(thread_stack_cache == NULL); } # endif #endif @@ -221,9 +221,9 @@ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; -static void account_kernel_stack(struct thread_info *ti, int account) +static void account_kernel_stack(unsigned long *stack, int account) { - struct zone *zone = page_zone(virt_to_page(ti)); + struct zone *zone = page_zone(virt_to_page(stack)); mod_zone_page_state(zone, NR_KERNEL_STACK, account); } @@ -231,8 +231,8 @@ static void account_kernel_stack(struct thread_info *ti, int account) void free_task(struct task_struct *tsk) { account_kernel_stack(tsk->stack, -1); - arch_release_thread_info(tsk->stack); - free_thread_info(tsk->stack); + arch_release_thread_stack(tsk->stack); + free_thread_stack(tsk->stack); rt_mutex_debug_task_free(tsk); ftrace_graph_exit_task(tsk); put_seccomp_filter(tsk); @@ -343,7 +343,7 @@ void set_task_stack_end_magic(struct task_struct *tsk) static struct task_struct *dup_task_struct(struct task_struct *orig, int node) { struct task_struct *tsk; - struct thread_info *ti; + unsigned long *stack; int err; if (node == NUMA_NO_NODE) @@ -352,15 +352,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (!tsk) return NULL; - ti = alloc_thread_info_node(tsk, node); - if (!ti) + stack = alloc_thread_stack_node(tsk, node); + if (!stack) goto free_tsk; err = arch_dup_task_struct(tsk, orig); if (err) - goto free_ti; + goto free_stack; - tsk->stack = ti; + tsk->stack = stack; #ifdef CONFIG_SECCOMP /* * We must handle setting up seccomp filters once we're under @@ -392,14 +392,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; - account_kernel_stack(ti, 1); + account_kernel_stack(stack, 1); kcov_task_init(tsk); return tsk; -free_ti: - free_thread_info(ti); +free_stack: + free_thread_stack(stack); free_tsk: free_task_struct(tsk); return NULL; -- cgit v1.3-6-gb490 From 7f1a00b6fcd0e3c19beba2e92d157dc0c2cf3494 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 24 Jun 2016 17:07:33 -0700 Subject: fix up initial thread stack pointer vs thread_info confusion The INIT_TASK() initializer was similarly confused about the stack vs thread_info allocation that the allocators had, and that were fixed in commit b235beea9e99 ("Clarify naming of thread info/stack allocators"). The task ->stack pointer only incidentally ends up having the same value as the thread_info, and in fact that will change. So fix the initial task struct initializer to point to 'init_stack' instead of 'init_thread_info', and make sure the ia64 definition for that exists. This actually makes the ia64 tsk->stack pointer be sensible for the initial task, but not for any other task. As mentioned in commit b235beea9e99, that whole pointer isn't actually used on ia64, since task_stack_page() there just points to the (single) allocation. All the other architectures seem to have copied the 'init_stack' definition, even if it tended to be generally unusued. Signed-off-by: Linus Torvalds --- arch/ia64/kernel/init_task.c | 1 + include/linux/init_task.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c index f9efe9739d3f..0eaa89f3defd 100644 --- a/arch/ia64/kernel/init_task.c +++ b/arch/ia64/kernel/init_task.c @@ -26,6 +26,7 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); * handled. This is done by having a special ".data..init_task" section... */ #define init_thread_info init_task_mem.s.thread_info +#define init_stack init_task_mem.stack union { struct { diff --git a/include/linux/init_task.h b/include/linux/init_task.h index f2cb8d45513d..f8834f820ec2 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -190,7 +190,7 @@ extern struct task_group root_task_group; #define INIT_TASK(tsk) \ { \ .state = 0, \ - .stack = &init_thread_info, \ + .stack = init_stack, \ .usage = ATOMIC_INIT(2), \ .flags = PF_KTHREAD, \ .prio = MAX_PRIO-20, \ -- cgit v1.3-6-gb490 From 9b75a867cc9ddbafcaf35029358ac500f2635ff3 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 24 Jun 2016 14:49:34 -0700 Subject: mm: mempool: kasan: don't poot mempool objects in quarantine Currently we may put reserved by mempool elements into quarantine via kasan_kfree(). This is totally wrong since quarantine may really free these objects. So when mempool will try to use such element, use-after-free will happen. Or mempool may decide that it no longer need that element and double-free it. So don't put object into quarantine in kasan_kfree(), just poison it. Rename kasan_kfree() to kasan_poison_kfree() to respect that. Also, we shouldn't use kasan_slab_alloc()/kasan_krealloc() in kasan_unpoison_element() because those functions may update allocation stacktrace. This would be wrong for the most of the remove_element call sites. (The only call site where we may want to update alloc stacktrace is in mempool_alloc(). Kmemleak solves this by calling kmemleak_update_trace(), so we could make something like that too. But this is out of scope of this patch). Fixes: 55834c59098d ("mm: kasan: initial memory quarantine implementation") Link: http://lkml.kernel.org/r/575977C3.1010905@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Kuthonuzo Luruo Acked-by: Alexander Potapenko Cc: Dmitriy Vyukov Cc: Kostya Serebryany Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 11 +++++++---- mm/kasan/kasan.c | 6 +++--- mm/mempool.c | 12 ++++-------- 3 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 611927f5870d..ac4b3c46a84d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -59,14 +59,13 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object); void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags); void kasan_kfree_large(const void *ptr); -void kasan_kfree(void *ptr); +void kasan_poison_kfree(void *ptr); void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags); void kasan_krealloc(const void *object, size_t new_size, gfp_t flags); void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags); bool kasan_slab_free(struct kmem_cache *s, void *object); -void kasan_poison_slab_free(struct kmem_cache *s, void *object); struct kasan_cache { int alloc_meta_offset; @@ -76,6 +75,9 @@ struct kasan_cache { int kasan_module_alloc(void *addr, size_t size); void kasan_free_shadow(const struct vm_struct *vm); +size_t ksize(const void *); +static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -102,7 +104,7 @@ static inline void kasan_poison_object_data(struct kmem_cache *cache, static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {} static inline void kasan_kfree_large(const void *ptr) {} -static inline void kasan_kfree(void *ptr) {} +static inline void kasan_poison_kfree(void *ptr) {} static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size, gfp_t flags) {} static inline void kasan_krealloc(const void *object, size_t new_size, @@ -114,11 +116,12 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object) { return false; } -static inline void kasan_poison_slab_free(struct kmem_cache *s, void *object) {} static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } static inline void kasan_free_shadow(const struct vm_struct *vm) {} +static inline void kasan_unpoison_slab(const void *ptr) { } + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 28439acda6ec..6845f9294696 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -508,7 +508,7 @@ void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags) kasan_kmalloc(cache, object, cache->object_size, flags); } -void kasan_poison_slab_free(struct kmem_cache *cache, void *object) +static void kasan_poison_slab_free(struct kmem_cache *cache, void *object) { unsigned long size = cache->object_size; unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); @@ -626,7 +626,7 @@ void kasan_krealloc(const void *object, size_t size, gfp_t flags) kasan_kmalloc(page->slab_cache, object, size, flags); } -void kasan_kfree(void *ptr) +void kasan_poison_kfree(void *ptr) { struct page *page; @@ -636,7 +636,7 @@ void kasan_kfree(void *ptr) kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page), KASAN_FREE_PAGE); else - kasan_slab_free(page->slab_cache, ptr); + kasan_poison_slab_free(page->slab_cache, ptr); } void kasan_kfree_large(const void *ptr) diff --git a/mm/mempool.c b/mm/mempool.c index 9e075f829d0d..8f65464da5de 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -104,20 +104,16 @@ static inline void poison_element(mempool_t *pool, void *element) static void kasan_poison_element(mempool_t *pool, void *element) { - if (pool->alloc == mempool_alloc_slab) - kasan_poison_slab_free(pool->pool_data, element); - if (pool->alloc == mempool_kmalloc) - kasan_kfree(element); + if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) + kasan_poison_kfree(element); if (pool->alloc == mempool_alloc_pages) kasan_free_pages(element, (unsigned long)pool->pool_data); } static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags) { - if (pool->alloc == mempool_alloc_slab) - kasan_slab_alloc(pool->pool_data, element, flags); - if (pool->alloc == mempool_kmalloc) - kasan_krealloc(element, (size_t)pool->pool_data, flags); + if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) + kasan_unpoison_slab(element); if (pool->alloc == mempool_alloc_pages) kasan_alloc_pages(element, (unsigned long)pool->pool_data); } -- cgit v1.3-6-gb490 From 315d09bf30c2b436a1fdac86d31c24380cd56c4f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 24 Jun 2016 14:49:45 -0700 Subject: Revert "mm: make faultaround produce old ptes" This reverts commit 5c0a85fad949212b3e059692deecdeed74ae7ec7. The commit causes ~6% regression in unixbench. Let's revert it for now and consider other solution for reclaim problem later. Link: http://lkml.kernel.org/r/1465893750-44080-2-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: "Huang, Ying" Cc: Linus Torvalds Cc: Rik van Riel Cc: Mel Gorman Cc: Michal Hocko Cc: Minchan Kim Cc: Vinayak Menon Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- mm/filemap.c | 2 +- mm/memory.c | 23 +++++------------------ 3 files changed, 7 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5df5feb49575..ece042dfe23c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -602,7 +602,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) } void do_set_pte(struct vm_area_struct *vma, unsigned long address, - struct page *page, pte_t *pte, bool write, bool anon, bool old); + struct page *page, pte_t *pte, bool write, bool anon); #endif /* diff --git a/mm/filemap.c b/mm/filemap.c index 00ae878b2a38..20f3b1f33f0e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2186,7 +2186,7 @@ repeat: if (file->f_ra.mmap_miss > 0) file->f_ra.mmap_miss--; addr = address + (page->index - vmf->pgoff) * PAGE_SIZE; - do_set_pte(vma, addr, page, pte, false, false, true); + do_set_pte(vma, addr, page, pte, false, false); unlock_page(page); goto next; unlock: diff --git a/mm/memory.c b/mm/memory.c index 15322b73636b..61fe7e7b56bf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2877,7 +2877,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address, * vm_ops->map_pages. */ void do_set_pte(struct vm_area_struct *vma, unsigned long address, - struct page *page, pte_t *pte, bool write, bool anon, bool old) + struct page *page, pte_t *pte, bool write, bool anon) { pte_t entry; @@ -2885,8 +2885,6 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, entry = mk_pte(page, vma->vm_page_prot); if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); - if (old) - entry = pte_mkold(entry); if (anon) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, address, false); @@ -3032,20 +3030,9 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, */ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); - if (!pte_same(*pte, orig_pte)) - goto unlock_out; do_fault_around(vma, address, pte, pgoff, flags); - /* Check if the fault is handled by faultaround */ - if (!pte_same(*pte, orig_pte)) { - /* - * Faultaround produce old pte, but the pte we've - * handler fault for should be young. - */ - pte_t entry = pte_mkyoung(*pte); - if (ptep_set_access_flags(vma, address, pte, entry, 0)) - update_mmu_cache(vma, address, pte); + if (!pte_same(*pte, orig_pte)) goto unlock_out; - } pte_unmap_unlock(pte, ptl); } @@ -3060,7 +3047,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, put_page(fault_page); return ret; } - do_set_pte(vma, address, fault_page, pte, false, false, false); + do_set_pte(vma, address, fault_page, pte, false, false); unlock_page(fault_page); unlock_out: pte_unmap_unlock(pte, ptl); @@ -3111,7 +3098,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, } goto uncharge_out; } - do_set_pte(vma, address, new_page, pte, true, true, false); + do_set_pte(vma, address, new_page, pte, true, true); mem_cgroup_commit_charge(new_page, memcg, false, false); lru_cache_add_active_or_unevictable(new_page, vma); pte_unmap_unlock(pte, ptl); @@ -3164,7 +3151,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, put_page(fault_page); return ret; } - do_set_pte(vma, address, fault_page, pte, true, false, false); + do_set_pte(vma, address, fault_page, pte, true, false); pte_unmap_unlock(pte, ptl); if (set_page_dirty(fault_page)) -- cgit v1.3-6-gb490